comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 3892:baf763f388e6

7059037: Use BIS for zeroing on T4 Summary: Use BIS for zeroing new allocated big (2Kb and more) objects and arrays. Reviewed-by: never, twisti, ysr
author kvn
date Fri, 26 Aug 2011 08:52:22 -0700
parents 3d42f82cd811
children a64d352d1118
comparison
equal deleted inserted replaced
3891:ac8738449b6f 3892:baf763f388e6
4971 delayed()->lduw(ary1, limit, chr1); // hoisted 4971 delayed()->lduw(ary1, limit, chr1); // hoisted
4972 4972
4973 // Caller should set it: 4973 // Caller should set it:
4974 // add(G0, 1, result); // equals 4974 // add(G0, 1, result); // equals
4975 } 4975 }
4976
4977 // Use BIS for zeroing (count is in bytes).
4978 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4979 assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4980 Register end = count;
4981 int cache_line_size = VM_Version::prefetch_data_size();
4982 // Minimum count when BIS zeroing can be used since
4983 // it needs membar which is expensive.
4984 int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4985
4986 Label small_loop;
4987 // Check if count is negative (dead code) or zero.
4988 // Note, count uses 64bit in 64 bit VM.
4989 cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4990
4991 // Use BIS zeroing only for big arrays since it requires membar.
4992 if (Assembler::is_simm13(block_zero_size)) { // < 4096
4993 cmp(count, block_zero_size);
4994 } else {
4995 set(block_zero_size, temp);
4996 cmp(count, temp);
4997 }
4998 br(Assembler::lessUnsigned, false, Assembler::pt, small_loop);
4999 delayed()->add(to, count, end);
5000
5001 // Note: size is >= three (32 bytes) cache lines.
5002
5003 // Clean the beginning of space up to next cache line.
5004 for (int offs = 0; offs < cache_line_size; offs += 8) {
5005 stx(G0, to, offs);
5006 }
5007
5008 // align to next cache line
5009 add(to, cache_line_size, to);
5010 and3(to, -cache_line_size, to);
5011
5012 // Note: size left >= two (32 bytes) cache lines.
5013
5014 // BIS should not be used to zero tail (64 bytes)
5015 // to avoid zeroing a header of the following object.
5016 sub(end, (cache_line_size*2)-8, end);
5017
5018 Label bis_loop;
5019 bind(bis_loop);
5020 stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
5021 add(to, cache_line_size, to);
5022 cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop);
5023
5024 // BIS needs membar.
5025 membar(Assembler::StoreLoad);
5026
5027 add(end, (cache_line_size*2)-8, end); // restore end
5028 cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone);
5029
5030 // Clean the tail.
5031 bind(small_loop);
5032 stx(G0, to, 0);
5033 add(to, 8, to);
5034 cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
5035 nop(); // Separate short branches
5036 }
5037