changeset 219:ab65a4c9b2e8

6708714: Optimize long LShift on 32-bits x86 Summary: For small (1-3 bits) left long shifts in 32-bits VM use sets of add+addc instructions instead of shld+shl on new AMD cpus. Reviewed-by: never Contributed-by: shrinivas.joshi@amd.com
author kvn
date Mon, 23 Jun 2008 14:11:12 -0700
parents 411c61adc994
children 30369db7f5d2
files src/cpu/x86/vm/vm_version_x86_32.cpp src/cpu/x86/vm/x86_32.ad src/share/vm/runtime/globals.hpp
diffstat 3 files changed, 91 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/vm_version_x86_32.cpp	Sat Jun 21 10:03:31 2008 -0700
+++ b/src/cpu/x86/vm/vm_version_x86_32.cpp	Mon Jun 23 14:11:12 2008 -0700
@@ -307,6 +307,10 @@
       // Use it on new AMD cpus starting from Opteron.
       UseAddressNop = true;
     }
+    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
+      // Use it on new AMD cpus starting from Opteron.
+      UseNewLongLShift = true;
+    }
     if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
       if( supports_sse4a() ) {
         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
--- a/src/cpu/x86/vm/x86_32.ad	Sat Jun 21 10:03:31 2008 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Mon Jun 23 14:11:12 2008 -0700
@@ -4754,6 +4754,33 @@
   interface(CONST_INTER);
 %}
 
+operand immI_1() %{
+  predicate( n->get_int() == 1 );
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_2() %{
+  predicate( n->get_int() == 2 );
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_3() %{
+  predicate( n->get_int() == 3 );
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Pointer Immediate
 operand immP() %{
   match(ConP);
@@ -8943,6 +8970,63 @@
   ins_pipe( ialu_reg_long_mem );
 %}
 
+// Shift Left Long by 1
+instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
+  predicate(UseNewLongLShift);
+  match(Set dst (LShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(100);
+  format %{ "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+  %}
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 2
+instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
+  predicate(UseNewLongLShift);
+  match(Set dst (LShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(100);
+  format %{ "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi\n\t" 
+            "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+  %}
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 3
+instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
+  predicate(UseNewLongLShift);
+  match(Set dst (LShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(100);
+  format %{ "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi\n\t" 
+            "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi\n\t" 
+            "ADD    $dst.lo,$dst.lo\n\t"
+            "ADC    $dst.hi,$dst.hi" %}
+  ins_encode %{
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+    __ addl($dst$$Register,$dst$$Register);
+    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+  %}
+  ins_pipe( ialu_reg_long );
+%}
+
 // Shift Left Long by 1-31
 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
   match(Set dst (LShiftL dst cnt));
--- a/src/share/vm/runtime/globals.hpp	Sat Jun 21 10:03:31 2008 -0700
+++ b/src/share/vm/runtime/globals.hpp	Mon Jun 23 14:11:12 2008 -0700
@@ -946,6 +946,9 @@
   diagnostic(bool, UseIncDec, true,                                         \
           "Use INC, DEC instructions on x86")                               \
                                                                             \
+  product(bool, UseNewLongLShift, false,                                    \
+          "Use optimized bitwise shift left")                               \
+                                                                            \
   product(bool, UseStoreImmI16, true,                                       \
           "Use store immediate 16-bits value instruction on x86")           \
                                                                             \