# HG changeset patch # User adlertz # Date 1398422914 -7200 # Node ID 400709e275c166becd08f4d2ed7f05e32c2e3564 # Parent f1b83c6b8411aea3e8e2950f220ee0d5b99abcbc 8029302: Performance regression in Math.pow intrinsic Summary: Added special case for x^y where y == 2 Reviewed-by: kvn diff -r f1b83c6b8411 -r 400709e275c1 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu May 08 23:10:23 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Apr 25 12:48:34 2014 +0200 @@ -3152,10 +3152,12 @@ // if fast computation is not possible, result is NaN. Requires // fallback from user of this macro. // increase precision for intermediate steps of the computation + BLOCK_COMMENT("fast_pow {"); increase_precision(); fyl2x(); // Stack: (Y*log2(X)) ... pow_exp_core_encoding(); // Stack: exp(X) ... restore_precision(); + BLOCK_COMMENT("} fast_pow"); } void MacroAssembler::fast_exp() { diff -r f1b83c6b8411 -r 400709e275c1 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu May 08 23:10:23 2014 -0700 +++ b/src/share/vm/opto/library_call.cpp Fri Apr 25 12:48:34 2014 +0200 @@ -216,7 +216,7 @@ bool inline_math_subtractExactL(bool is_decrement); bool inline_exp(); bool inline_pow(); - void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); + Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_min_max(vmIntrinsics::ID id); Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. @@ -1678,7 +1678,7 @@ return true; } -void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { +Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { //------------------- //result=(result.isNaN())? funcAddr():result; // Check: If isNaN() by checking result!=result? then either trap @@ -1694,7 +1694,7 @@ uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_make_not_entrant); } - set_result(result); + return result; } else { // If this inlining ever returned NaN in the past, we compile a call // to the runtime to properly handle corner cases @@ -1724,9 +1724,10 @@ result_region->init_req(2, control()); result_val->init_req(2, value); - set_result(result_region, result_val); + set_control(_gvn.transform(result_region)); + return result_val; } else { - set_result(result); + return result; } } } @@ -1738,7 +1739,8 @@ Node* arg = round_double_node(argument(0)); Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg)); - finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); + n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); + set_result(n); C->set_has_split_ifs(true); // Has chance for split-if optimization return true; @@ -1748,27 +1750,48 @@ // Inline power instructions, if possible. bool LibraryCallKit::inline_pow() { // Pseudocode for pow - // if (x <= 0.0) { - // long longy = (long)y; - // if ((double)longy == y) { // if y is long - // if (y + 1 == y) longy = 0; // huge number: even - // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); + // if (y == 2) { + // return x * x; + // } else { + // if (x <= 0.0) { + // long longy = (long)y; + // if ((double)longy == y) { // if y is long + // if (y + 1 == y) longy = 0; // huge number: even + // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); + // } else { + // result = NaN; + // } // } else { - // result = NaN; + // result = DPow(x,y); // } - // } else { - // result = DPow(x,y); + // if (result != result)? { + // result = uncommon_trap() or runtime_call(); + // } + // return result; // } - // if (result != result)? { - // result = uncommon_trap() or runtime_call(); - // } - // return result; Node* x = round_double_node(argument(0)); Node* y = round_double_node(argument(2)); Node* result = NULL; + Node* const_two_node = makecon(TypeD::make(2.0)); + Node* cmp_node = _gvn.transform(new (C) CmpDNode(y, const_two_node)); + Node* bool_node = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq)); + IfNode* if_node = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN); + Node* if_true = _gvn.transform(new (C) IfTrueNode(if_node)); + Node* if_false = _gvn.transform(new (C) IfFalseNode(if_node)); + + RegionNode* region_node = new (C) RegionNode(3); + region_node->init_req(1, if_true); + + Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE); + // special case for x^y where y == 2, we can convert it to x * x + phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x))); + + // set control to if_false since we will now process the false branch + set_control(if_false); + if (!too_many_traps(Deoptimization::Reason_intrinsic)) { // Short form: skip the fancy tests and just check for NaN result. result = _gvn.transform(new (C) PowDNode(C, control(), x, y)); @@ -1892,7 +1915,15 @@ result = _gvn.transform(phi); } - finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + + // control from finish_pow_exp is now input to the region node + region_node->set_req(2, control()); + // the result from finish_pow_exp is now input to the phi node + phi_node->init_req(2, _gvn.transform(result)); + set_control(_gvn.transform(region_node)); + record_for_igvn(region_node); + set_result(_gvn.transform(phi_node)); C->set_has_split_ifs(true); // Has chance for split-if optimization return true;