Mercurial > hg > graal-jvmci-8
comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 3892:baf763f388e6
7059037: Use BIS for zeroing on T4
Summary: Use BIS for zeroing new allocated big (2Kb and more) objects and arrays.
Reviewed-by: never, twisti, ysr
author | kvn |
---|---|
date | Fri, 26 Aug 2011 08:52:22 -0700 |
parents | 3d42f82cd811 |
children | a64d352d1118 |
comparison
equal
deleted
inserted
replaced
3891:ac8738449b6f | 3892:baf763f388e6 |
---|---|
4971 delayed()->lduw(ary1, limit, chr1); // hoisted | 4971 delayed()->lduw(ary1, limit, chr1); // hoisted |
4972 | 4972 |
4973 // Caller should set it: | 4973 // Caller should set it: |
4974 // add(G0, 1, result); // equals | 4974 // add(G0, 1, result); // equals |
4975 } | 4975 } |
4976 | |
4977 // Use BIS for zeroing (count is in bytes). | |
4978 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { | |
4979 assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing"); | |
4980 Register end = count; | |
4981 int cache_line_size = VM_Version::prefetch_data_size(); | |
4982 // Minimum count when BIS zeroing can be used since | |
4983 // it needs membar which is expensive. | |
4984 int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit); | |
4985 | |
4986 Label small_loop; | |
4987 // Check if count is negative (dead code) or zero. | |
4988 // Note, count uses 64bit in 64 bit VM. | |
4989 cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone); | |
4990 | |
4991 // Use BIS zeroing only for big arrays since it requires membar. | |
4992 if (Assembler::is_simm13(block_zero_size)) { // < 4096 | |
4993 cmp(count, block_zero_size); | |
4994 } else { | |
4995 set(block_zero_size, temp); | |
4996 cmp(count, temp); | |
4997 } | |
4998 br(Assembler::lessUnsigned, false, Assembler::pt, small_loop); | |
4999 delayed()->add(to, count, end); | |
5000 | |
5001 // Note: size is >= three (32 bytes) cache lines. | |
5002 | |
5003 // Clean the beginning of space up to next cache line. | |
5004 for (int offs = 0; offs < cache_line_size; offs += 8) { | |
5005 stx(G0, to, offs); | |
5006 } | |
5007 | |
5008 // align to next cache line | |
5009 add(to, cache_line_size, to); | |
5010 and3(to, -cache_line_size, to); | |
5011 | |
5012 // Note: size left >= two (32 bytes) cache lines. | |
5013 | |
5014 // BIS should not be used to zero tail (64 bytes) | |
5015 // to avoid zeroing a header of the following object. | |
5016 sub(end, (cache_line_size*2)-8, end); | |
5017 | |
5018 Label bis_loop; | |
5019 bind(bis_loop); | |
5020 stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY); | |
5021 add(to, cache_line_size, to); | |
5022 cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop); | |
5023 | |
5024 // BIS needs membar. | |
5025 membar(Assembler::StoreLoad); | |
5026 | |
5027 add(end, (cache_line_size*2)-8, end); // restore end | |
5028 cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone); | |
5029 | |
5030 // Clean the tail. | |
5031 bind(small_loop); | |
5032 stx(G0, to, 0); | |
5033 add(to, 8, to); | |
5034 cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); | |
5035 nop(); // Separate short branches | |
5036 } | |
5037 |