Mercurial > hg > truffle
diff src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1365:6476042f815c
6940701: Don't align loops in stubs for Niagara sparc
Summary: Don't align loops in stubs for Niagara sparc since NOPs are expensive.
Reviewed-by: twisti, never
author | kvn |
---|---|
date | Wed, 07 Apr 2010 09:37:47 -0700 |
parents | 0dc88ad3244e |
children | c640000b7cc1 |
line wrap: on
line diff
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Apr 06 15:18:10 2010 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Apr 07 09:37:47 2010 -0700 @@ -1148,7 +1148,7 @@ __ andn(from, 7, from); // Align address __ ldx(from, 0, O3); __ inc(from, 8); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_loop); __ ldx(from, 0, O4); __ deccc(count, count_dec); // Can we do next iteration after this one? @@ -1220,7 +1220,7 @@ // __ andn(end_from, 7, end_from); // Align address __ ldx(end_from, 0, O3); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_loop); __ ldx(end_from, -8, O4); __ deccc(count, count_dec); // Can we do next iteration after this one? @@ -1349,7 +1349,7 @@ __ BIND(L_copy_byte); __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); __ delayed()->nop(); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_byte_loop); __ ldub(from, offset, O3); __ deccc(count); @@ -1445,7 +1445,7 @@ L_aligned_copy, L_copy_byte); } // copy 4 elements (16 bytes) at a time - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_aligned_copy); __ dec(end_from, 16); __ ldx(end_from, 8, O3); @@ -1461,7 +1461,7 @@ __ BIND(L_copy_byte); __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); __ delayed()->nop(); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_byte_loop); __ dec(end_from); __ dec(end_to); @@ -1577,7 +1577,7 @@ __ BIND(L_copy_2_bytes); __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); __ delayed()->nop(); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_2_bytes_loop); __ lduh(from, offset, O3); __ deccc(count); @@ -1684,7 +1684,7 @@ L_aligned_copy, L_copy_2_bytes); } // copy 4 elements (16 bytes) at a time - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_aligned_copy); __ dec(end_from, 16); __ ldx(end_from, 8, O3); @@ -1781,7 +1781,7 @@ // copy with shift 4 elements (16 bytes) at a time __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_16_bytes); __ ldx(from, 4, O4); __ deccc(count, 4); // Can we do next iteration after this one? @@ -1907,7 +1907,7 @@ // to form 2 aligned 8-bytes chunks to store. // __ ldx(end_from, -4, O3); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_16_bytes); __ ldx(end_from, -12, O4); __ deccc(count, 4); @@ -1929,7 +1929,7 @@ __ delayed()->inc(count, 4); // copy 4 elements (16 bytes) at a time - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_aligned_copy); __ dec(end_from, 16); __ ldx(end_from, 8, O3); @@ -2045,7 +2045,7 @@ __ mov(O3, count); __ mov(from, from64); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_64_bytes); for( int off = 0; off < 64; off += 16 ) { __ ldx(from64, off+0, O4); @@ -2065,7 +2065,7 @@ __ delayed()->add(offset0, 8, offset8); // Copy by 16 bytes chunks - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_16_bytes); __ ldx(from, offset0, O3); __ ldx(from, offset8, G3); @@ -2139,7 +2139,7 @@ __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes ); __ delayed()->sllx(count, LogBytesPerLong, offset8); __ sub(offset8, 8, offset0); - __ align(16); + __ align(OptoLoopAlignment); __ BIND(L_copy_16_bytes); __ ldx(from, offset8, O2); __ ldx(from, offset0, O3); @@ -2405,7 +2405,7 @@ // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* // G3, G4, G5 --- current oop, oop.klass, oop.klass.super - __ align(16); + __ align(OptoLoopAlignment); __ BIND(store_element); __ deccc(G1_remain); // decrement the count