diff src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1365:6476042f815c

6940701: Don't align loops in stubs for Niagara sparc Summary: Don't align loops in stubs for Niagara sparc since NOPs are expensive. Reviewed-by: twisti, never
author kvn
date Wed, 07 Apr 2010 09:37:47 -0700
parents 0dc88ad3244e
children c640000b7cc1
line wrap: on
line diff
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Apr 06 15:18:10 2010 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Apr 07 09:37:47 2010 -0700
@@ -1148,7 +1148,7 @@
       __ andn(from, 7, from);     // Align address
       __ ldx(from, 0, O3);
       __ inc(from, 8);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_loop);
       __ ldx(from, 0, O4);
       __ deccc(count, count_dec); // Can we do next iteration after this one?
@@ -1220,7 +1220,7 @@
     //
       __ andn(end_from, 7, end_from);     // Align address
       __ ldx(end_from, 0, O3);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_loop);
       __ ldx(end_from, -8, O4);
       __ deccc(count, count_dec); // Can we do next iteration after this one?
@@ -1349,7 +1349,7 @@
     __ BIND(L_copy_byte);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ ldub(from, offset, O3);
       __ deccc(count);
@@ -1445,7 +1445,7 @@
                                         L_aligned_copy, L_copy_byte);
     }
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -1461,7 +1461,7 @@
     __ BIND(L_copy_byte);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ dec(end_from);
       __ dec(end_to);
@@ -1577,7 +1577,7 @@
     __ BIND(L_copy_2_bytes);
       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
       __ delayed()->nop();
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_2_bytes_loop);
       __ lduh(from, offset, O3);
       __ deccc(count);
@@ -1684,7 +1684,7 @@
                                         L_aligned_copy, L_copy_2_bytes);
     }
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -1781,7 +1781,7 @@
     // copy with shift 4 elements (16 bytes) at a time
       __ dec(count, 4);   // The cmp at the beginning guaranty count >= 4
 
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, 4, O4);
       __ deccc(count, 4); // Can we do next iteration after this one?
@@ -1907,7 +1907,7 @@
     // to form 2 aligned 8-bytes chunks to store.
     //
       __ ldx(end_from, -4, O3);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(end_from, -12, O4);
       __ deccc(count, 4);
@@ -1929,7 +1929,7 @@
       __ delayed()->inc(count, 4);
 
     // copy 4 elements (16 bytes) at a time
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_aligned_copy);
       __ dec(end_from, 16);
       __ ldx(end_from, 8, O3);
@@ -2045,7 +2045,7 @@
       __ mov(O3, count);
       __ mov(from, from64);
 
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_64_bytes);
       for( int off = 0; off < 64; off += 16 ) {
         __ ldx(from64,  off+0, O4);
@@ -2065,7 +2065,7 @@
       __ delayed()->add(offset0, 8, offset8);
 
       // Copy by 16 bytes chunks
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, offset0, O3);
       __ ldx(from, offset8, G3);
@@ -2139,7 +2139,7 @@
       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
       __ delayed()->sllx(count, LogBytesPerLong, offset8);
       __ sub(offset8, 8, offset0);
-      __ align(16);
+      __ align(OptoLoopAlignment);
     __ BIND(L_copy_16_bytes);
       __ ldx(from, offset8, O2);
       __ ldx(from, offset0, O3);
@@ -2405,7 +2405,7 @@
     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
-    __ align(16);
+    __ align(OptoLoopAlignment);
 
     __ BIND(store_element);
     __ deccc(G1_remain);                // decrement the count