# HG changeset patch
# User adlertz
# Date 1398422914 -7200
# Node ID 400709e275c166becd08f4d2ed7f05e32c2e3564
# Parent  f1b83c6b8411aea3e8e2950f220ee0d5b99abcbc
8029302: Performance regression in Math.pow intrinsic
Summary: Added special case for x^y where y == 2
Reviewed-by: kvn

diff -r f1b83c6b8411 -r 400709e275c1 src/cpu/x86/vm/macroAssembler_x86.cpp
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 08 23:10:23 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Apr 25 12:48:34 2014 +0200
@@ -3152,10 +3152,12 @@
   // if fast computation is not possible, result is NaN. Requires
   // fallback from user of this macro.
   // increase precision for intermediate steps of the computation
+  BLOCK_COMMENT("fast_pow {");
   increase_precision();
   fyl2x();                 // Stack: (Y*log2(X)) ...
   pow_exp_core_encoding(); // Stack: exp(X) ...
   restore_precision();
+  BLOCK_COMMENT("} fast_pow");
 }
 
 void MacroAssembler::fast_exp() {
diff -r f1b83c6b8411 -r 400709e275c1 src/share/vm/opto/library_call.cpp
--- a/src/share/vm/opto/library_call.cpp	Thu May 08 23:10:23 2014 -0700
+++ b/src/share/vm/opto/library_call.cpp	Fri Apr 25 12:48:34 2014 +0200
@@ -216,7 +216,7 @@
   bool inline_math_subtractExactL(bool is_decrement);
   bool inline_exp();
   bool inline_pow();
-  void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
+  Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_min_max(vmIntrinsics::ID id);
   Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
@@ -1678,7 +1678,7 @@
   return true;
 }
 
-void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
+Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
   //-------------------
   //result=(result.isNaN())? funcAddr():result;
   // Check: If isNaN() by checking result!=result? then either trap
@@ -1694,7 +1694,7 @@
       uncommon_trap(Deoptimization::Reason_intrinsic,
                     Deoptimization::Action_make_not_entrant);
     }
-    set_result(result);
+    return result;
   } else {
     // If this inlining ever returned NaN in the past, we compile a call
     // to the runtime to properly handle corner cases
@@ -1724,9 +1724,10 @@
 
       result_region->init_req(2, control());
       result_val->init_req(2, value);
-      set_result(result_region, result_val);
+      set_control(_gvn.transform(result_region));
+      return result_val;
     } else {
-      set_result(result);
+      return result;
     }
   }
 }
@@ -1738,7 +1739,8 @@
   Node* arg = round_double_node(argument(0));
   Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));
 
-  finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+  n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+  set_result(n);
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
   return true;
@@ -1748,27 +1750,48 @@
 // Inline power instructions, if possible.
 bool LibraryCallKit::inline_pow() {
   // Pseudocode for pow
-  // if (x <= 0.0) {
-  //   long longy = (long)y;
-  //   if ((double)longy == y) { // if y is long
-  //     if (y + 1 == y) longy = 0; // huge number: even
-  //     result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
+  // if (y == 2) {
+  //   return x * x;
+  // } else {
+  //   if (x <= 0.0) {
+  //     long longy = (long)y;
+  //     if ((double)longy == y) { // if y is long
+  //       if (y + 1 == y) longy = 0; // huge number: even
+  //       result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
+  //     } else {
+  //       result = NaN;
+  //     }
   //   } else {
-  //     result = NaN;
+  //     result = DPow(x,y);
   //   }
-  // } else {
-  //   result = DPow(x,y);
+  //   if (result != result)?  {
+  //     result = uncommon_trap() or runtime_call();
+  //   }
+  //   return result;
   // }
-  // if (result != result)?  {
-  //   result = uncommon_trap() or runtime_call();
-  // }
-  // return result;
 
   Node* x = round_double_node(argument(0));
   Node* y = round_double_node(argument(2));
 
   Node* result = NULL;
 
+  Node*   const_two_node = makecon(TypeD::make(2.0));
+  Node*   cmp_node       = _gvn.transform(new (C) CmpDNode(y, const_two_node));
+  Node*   bool_node      = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq));
+  IfNode* if_node        = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+  Node*   if_true        = _gvn.transform(new (C) IfTrueNode(if_node));
+  Node*   if_false       = _gvn.transform(new (C) IfFalseNode(if_node));
+
+  RegionNode* region_node = new (C) RegionNode(3);
+  region_node->init_req(1, if_true);
+
+  Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE);
+  // special case for x^y where y == 2, we can convert it to x * x
+  phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x)));
+
+  // set control to if_false since we will now process the false branch
+  set_control(if_false);
+
   if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
     // Short form: skip the fancy tests and just check for NaN result.
     result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
@@ -1892,7 +1915,15 @@
     result = _gvn.transform(phi);
   }
 
-  finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+  result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+
+  // control from finish_pow_exp is now input to the region node
+  region_node->set_req(2, control());
+  // the result from finish_pow_exp is now input to the phi node
+  phi_node->init_req(2, _gvn.transform(result));
+  set_control(_gvn.transform(region_node));
+  record_for_igvn(region_node);
+  set_result(_gvn.transform(phi_node));
 
   C->set_has_split_ifs(true); // Has chance for split-if optimization
   return true;