# HG changeset patch # User roland # Date 1360670171 -3600 # Node ID 8b3da8d14c93394a940599cb1a1cdcdb91c856e2 # Parent 1e5e28bac299b795ff9420058fe1b8b0b78b9675 7197327: 40% regression on 8 b41 comp 8 b40 on specjvm2008.mpegaudio on oob Summary: Add support for expensive nodes. Reviewed-by: kvn diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/c2_globals.hpp Tue Feb 12 12:56:11 2013 +0100 @@ -618,6 +618,9 @@ \ product(intx, LiveNodeCountInliningCutoff, 20000, \ "max number of live nodes in a method") \ + \ + diagnostic(bool, OptimizeExpensiveOps, true, \ + "Find best control for expensive operations") \ C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG) diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/compile.cpp Tue Feb 12 12:56:11 2013 +0100 @@ -409,6 +409,13 @@ remove_macro_node(n); } } + // Remove useless expensive node + for (int i = C->expensive_count()-1; i >= 0; i--) { + Node* n = C->expensive_node(i); + if (!useful.member(n)) { + remove_expensive_node(n); + } + } // clean up the late inline lists remove_useless_late_inlines(&_string_late_inlines, useful); remove_useless_late_inlines(&_late_inlines, useful); @@ -1061,6 +1068,7 @@ _intrinsics = NULL; _macro_nodes = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); _predicate_opaqs = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); + _expensive_nodes = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -1927,6 +1935,10 @@ if (failing()) return; + // No more new expensive nodes will be added to the list from here + // so keep only the actual candidates for optimizations. + cleanup_expensive_nodes(igvn); + // Perform escape analysis if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { if (has_loops()) { @@ -3010,6 +3022,15 @@ return true; } + // Expensive nodes have their control input set to prevent the GVN + // from freely commoning them. There's no GVN beyond this point so + // no need to keep the control input. We want the expensive nodes to + // be freely moved to the least frequent code path by gcm. + assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?"); + for (int i = 0; i < expensive_count(); i++) { + _expensive_nodes->at(i)->set_req(0, NULL); + } + Final_Reshape_Counts frc; // Visit everybody reachable! @@ -3525,3 +3546,126 @@ } } } + +int Compile::cmp_expensive_nodes(Node* n1, Node* n2) { + if (n1->Opcode() < n2->Opcode()) return -1; + else if (n1->Opcode() > n2->Opcode()) return 1; + + assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req())); + for (uint i = 1; i < n1->req(); i++) { + if (n1->in(i) < n2->in(i)) return -1; + else if (n1->in(i) > n2->in(i)) return 1; + } + + return 0; +} + +int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) { + Node* n1 = *n1p; + Node* n2 = *n2p; + + return cmp_expensive_nodes(n1, n2); +} + +void Compile::sort_expensive_nodes() { + if (!expensive_nodes_sorted()) { + _expensive_nodes->sort(cmp_expensive_nodes); + } +} + +bool Compile::expensive_nodes_sorted() const { + for (int i = 1; i < _expensive_nodes->length(); i++) { + if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) { + return false; + } + } + return true; +} + +bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) { + if (_expensive_nodes->length() == 0) { + return false; + } + + assert(OptimizeExpensiveOps, "optimization off?"); + + // Take this opportunity to remove dead nodes from the list + int j = 0; + for (int i = 0; i < _expensive_nodes->length(); i++) { + Node* n = _expensive_nodes->at(i); + if (!n->is_unreachable(igvn)) { + assert(n->is_expensive(), "should be expensive"); + _expensive_nodes->at_put(j, n); + j++; + } + } + _expensive_nodes->trunc_to(j); + + // Then sort the list so that similar nodes are next to each other + // and check for at least two nodes of identical kind with same data + // inputs. + sort_expensive_nodes(); + + for (int i = 0; i < _expensive_nodes->length()-1; i++) { + if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) { + return true; + } + } + + return false; +} + +void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) { + if (_expensive_nodes->length() == 0) { + return; + } + + assert(OptimizeExpensiveOps, "optimization off?"); + + // Sort to bring similar nodes next to each other and clear the + // control input of nodes for which there's only a single copy. + sort_expensive_nodes(); + + int j = 0; + int identical = 0; + int i = 0; + for (; i < _expensive_nodes->length()-1; i++) { + assert(j <= i, "can't write beyond current index"); + if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) { + identical++; + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); + continue; + } + if (identical > 0) { + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); + identical = 0; + } else { + Node* n = _expensive_nodes->at(i); + igvn.hash_delete(n); + n->set_req(0, NULL); + igvn.hash_insert(n); + } + } + if (identical > 0) { + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); + } else if (_expensive_nodes->length() >= 1) { + Node* n = _expensive_nodes->at(i); + igvn.hash_delete(n); + n->set_req(0, NULL); + igvn.hash_insert(n); + } + _expensive_nodes->trunc_to(j); +} + +void Compile::add_expensive_node(Node * n) { + assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list"); + assert(n->is_expensive(), "expensive nodes with non-null control here only"); + assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here"); + if (OptimizeExpensiveOps) { + _expensive_nodes->append(n); + } else { + // Clear control input and let IGVN optimize expensive nodes if + // OptimizeExpensiveOps is off. + n->set_req(0, NULL); + } +} diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/compile.hpp Tue Feb 12 12:56:11 2013 +0100 @@ -314,6 +314,7 @@ GrowableArray* _intrinsics; // List of intrinsics. GrowableArray* _macro_nodes; // List of nodes which need to be expanded before matching. GrowableArray* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. + GrowableArray* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -398,6 +399,13 @@ GrowableArray* _print_inlining_list; int _print_inlining; + // Only keep nodes in the expensive node list that need to be optimized + void cleanup_expensive_nodes(PhaseIterGVN &igvn); + // Use for sorting expensive nodes to bring similar nodes together + static int cmp_expensive_nodes(Node** n1, Node** n2); + // Expensive nodes list already sorted? + bool expensive_nodes_sorted() const; + public: outputStream* print_inlining_stream() const { @@ -573,8 +581,10 @@ int macro_count() { return _macro_nodes->length(); } int predicate_count() { return _predicate_opaqs->length();} + int expensive_count() { return _expensive_nodes->length(); } Node* macro_node(int idx) { return _macro_nodes->at(idx); } Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} + Node* expensive_node(int idx) { return _expensive_nodes->at(idx); } ConnectionGraph* congraph() { return _congraph;} void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} void add_macro_node(Node * n) { @@ -592,6 +602,12 @@ _predicate_opaqs->remove(n); } } + void add_expensive_node(Node * n); + void remove_expensive_node(Node * n) { + if (_expensive_nodes->contains(n)) { + _expensive_nodes->remove(n); + } + } void add_predicate_opaq(Node * n) { assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); assert(_macro_nodes->contains(n), "should have already been in macro list"); @@ -604,6 +620,13 @@ return _predicate_opaqs->contains(n); } + // Are there candidate expensive nodes for optimization? + bool should_optimize_expensive_nodes(PhaseIterGVN &igvn); + // Check whether n1 and n2 are similar + static int cmp_expensive_nodes(Node* n1, Node* n2); + // Sort expensive nodes to locate similar expensive nodes + void sort_expensive_nodes(); + // Compilation environment. Arena* comp_arena() { return &_comp_arena; } ciEnv* env() const { return _env; } diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/library_call.cpp Tue Feb 12 12:56:11 2013 +0100 @@ -1653,7 +1653,7 @@ // really odd corner cases (+/- Infinity). Just uncommon-trap them. bool LibraryCallKit::inline_exp() { Node* arg = round_double_node(argument(0)); - Node* n = _gvn.transform(new (C) ExpDNode(0, arg)); + Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg)); finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); @@ -1688,7 +1688,7 @@ if (!too_many_traps(Deoptimization::Reason_intrinsic)) { // Short form: skip the fancy tests and just check for NaN result. - result = _gvn.transform(new (C) PowDNode(0, x, y)); + result = _gvn.transform(new (C) PowDNode(C, control(), x, y)); } else { // If this inlining ever returned NaN in the past, include all // checks + call to the runtime. @@ -1715,7 +1715,7 @@ Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) ); // Set fast path result - Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) ); + Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) ); phi->init_req(3, fast_result); // Complex path @@ -1775,7 +1775,7 @@ // abs(x) Node *absx=_gvn.transform( new (C) AbsDNode(x)); // abs(x)^y - Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) ); + Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) ); // -abs(x)^y Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy)); // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y) diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/loopnode.cpp Tue Feb 12 12:56:11 2013 +0100 @@ -88,9 +88,9 @@ assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" ); uint i; Node *early; - if( n->in(0) ) { + if (n->in(0) && !n->is_expensive()) { early = n->in(0); - if( !early->is_CFG() ) // Might be a non-CFG multi-def + if (!early->is_CFG()) // Might be a non-CFG multi-def early = get_ctrl(early); // So treat input as a straight data input i = 1; } else { @@ -99,28 +99,28 @@ } uint e_d = dom_depth(early); assert( early, "" ); - for( ; i < n->req(); i++ ) { + for (; i < n->req(); i++) { Node *cin = get_ctrl(n->in(i)); assert( cin, "" ); // Keep deepest dominator depth uint c_d = dom_depth(cin); - if( c_d > e_d ) { // Deeper guy? + if (c_d > e_d) { // Deeper guy? early = cin; // Keep deepest found so far e_d = c_d; - } else if( c_d == e_d && // Same depth? - early != cin ) { // If not equal, must use slower algorithm + } else if (c_d == e_d && // Same depth? + early != cin) { // If not equal, must use slower algorithm // If same depth but not equal, one _must_ dominate the other // and we want the deeper (i.e., dominated) guy. Node *n1 = early; Node *n2 = cin; - while( 1 ) { + while (1) { n1 = idom(n1); // Walk up until break cycle n2 = idom(n2); - if( n1 == cin || // Walked early up to cin - dom_depth(n2) < c_d ) + if (n1 == cin || // Walked early up to cin + dom_depth(n2) < c_d) break; // early is deeper; keep him - if( n2 == early || // Walked cin up to early - dom_depth(n1) < c_d ) { + if (n2 == early || // Walked cin up to early + dom_depth(n1) < c_d) { early = cin; // cin is deeper; keep him break; } @@ -132,9 +132,108 @@ // Return earliest legal location assert(early == find_non_split_ctrl(early), "unexpected early control"); + if (n->is_expensive()) { + assert(n->in(0), "should have control input"); + early = get_early_ctrl_for_expensive(n, early); + } + return early; } +//------------------------------get_early_ctrl_for_expensive--------------------------------- +// Move node up the dominator tree as high as legal while still beneficial +Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) { + assert(n->in(0) && n->is_expensive(), "expensive node with control input here"); + assert(OptimizeExpensiveOps, "optimization off?"); + + Node* ctl = n->in(0); + assert(ctl->is_CFG(), "expensive input 0 must be cfg"); + uint min_dom_depth = dom_depth(earliest); +#ifdef ASSERT + if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) { + dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl); + assert(false, "Bad graph detected in get_early_ctrl_for_expensive"); + } +#endif + if (dom_depth(ctl) < min_dom_depth) { + return earliest; + } + + while (1) { + Node *next = ctl; + // Moving the node out of a loop on the projection of a If + // confuses loop predication. So once we hit a Loop in a If branch + // that doesn't branch to an UNC, we stop. The code that process + // expensive nodes will notice the loop and skip over it to try to + // move the node further up. + if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) { + if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) { + break; + } + next = idom(ctl->in(1)->in(0)); + } else if (ctl->is_Proj()) { + // We only move it up along a projection if the projection is + // the single control projection for its parent: same code path, + // if it's a If with UNC or fallthrough of a call. + Node* parent_ctl = ctl->in(0); + if (parent_ctl == NULL) { + break; + } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) { + next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control(); + } else if (parent_ctl->is_If()) { + if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) { + break; + } + assert(idom(ctl) == parent_ctl, "strange"); + next = idom(parent_ctl); + } else if (ctl->is_CatchProj()) { + if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) { + break; + } + assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph"); + next = parent_ctl->in(0)->in(0)->in(0); + } else { + // Check if parent control has a single projection (this + // control is the only possible successor of the parent + // control). If so, we can try to move the node above the + // parent control. + int nb_ctl_proj = 0; + for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) { + Node *p = parent_ctl->fast_out(i); + if (p->is_Proj() && p->is_CFG()) { + nb_ctl_proj++; + if (nb_ctl_proj > 1) { + break; + } + } + } + + if (nb_ctl_proj > 1) { + break; + } + assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node"); + assert(idom(ctl) == parent_ctl, "strange"); + next = idom(parent_ctl); + } + } else { + next = idom(ctl); + } + if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) { + break; + } + ctl = next; + } + + if (ctl != n->in(0)) { + _igvn.hash_delete(n); + n->set_req(0, ctl); + _igvn.hash_insert(n); + } + + return ctl; +} + + //------------------------------set_early_ctrl--------------------------------- // Set earliest legal control void PhaseIdealLoop::set_early_ctrl( Node *n ) { @@ -1892,6 +1991,98 @@ } } +//------------------------process_expensive_nodes----------------------------- +// Expensive nodes have their control input set to prevent the GVN +// from commoning them and as a result forcing the resulting node to +// be in a more frequent path. Use CFG information here, to change the +// control inputs so that some expensive nodes can be commoned while +// not executed more frequently. +bool PhaseIdealLoop::process_expensive_nodes() { + assert(OptimizeExpensiveOps, "optimization off?"); + + // Sort nodes to bring similar nodes together + C->sort_expensive_nodes(); + + bool progress = false; + + for (int i = 0; i < C->expensive_count(); ) { + Node* n = C->expensive_node(i); + int start = i; + // Find nodes similar to n + i++; + for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++); + int end = i; + // And compare them two by two + for (int j = start; j < end; j++) { + Node* n1 = C->expensive_node(j); + if (is_node_unreachable(n1)) { + continue; + } + for (int k = j+1; k < end; k++) { + Node* n2 = C->expensive_node(k); + if (is_node_unreachable(n2)) { + continue; + } + + assert(n1 != n2, "should be pair of nodes"); + + Node* c1 = n1->in(0); + Node* c2 = n2->in(0); + + Node* parent_c1 = c1; + Node* parent_c2 = c2; + + // The call to get_early_ctrl_for_expensive() moves the + // expensive nodes up but stops at loops that are in a if + // branch. See whether we can exit the loop and move above the + // If. + if (c1->is_Loop()) { + parent_c1 = c1->in(1); + } + if (c2->is_Loop()) { + parent_c2 = c2->in(1); + } + + if (parent_c1 == parent_c2) { + _igvn._worklist.push(n1); + _igvn._worklist.push(n2); + continue; + } + + // Look for identical expensive node up the dominator chain. + if (is_dominator(c1, c2)) { + c2 = c1; + } else if (is_dominator(c2, c1)) { + c1 = c2; + } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() && + parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) { + // Both branches have the same expensive node so move it up + // before the if. + c1 = c2 = idom(parent_c1->in(0)); + } + // Do the actual moves + if (n1->in(0) != c1) { + _igvn.hash_delete(n1); + n1->set_req(0, c1); + _igvn.hash_insert(n1); + _igvn._worklist.push(n1); + progress = true; + } + if (n2->in(0) != c2) { + _igvn.hash_delete(n2); + n2->set_req(0, c2); + _igvn.hash_insert(n2); + _igvn._worklist.push(n2); + progress = true; + } + } + } + } + + return progress; +} + + //============================================================================= //----------------------------build_and_optimize------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to @@ -1960,7 +2151,9 @@ } // Nothing to do, so get out - if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) { + bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only; + bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn); + if (stop_early && !do_expensive_nodes) { _igvn.optimize(); // Cleanup NeverBranches return; } @@ -2058,6 +2251,21 @@ return; } + if (stop_early) { + assert(do_expensive_nodes, "why are we here?"); + if (process_expensive_nodes()) { + // If we made some progress when processing expensive nodes then + // the IGVN may modify the graph in a way that will allow us to + // make some more progress: we need to try processing expensive + // nodes again. + C->set_major_progress(); + } + + _igvn.optimize(); + + return; + } + // Some parser-inserted loop predicates could never be used by loop // predication or they were moved away from loop during some optimizations. // For example, peeling. Eliminate them before next loop optimizations. @@ -2120,6 +2328,10 @@ NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); } + if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) { + C->set_major_progress(); + } + // Perform loop predication before iteration splitting if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) { _ltree_root->_child->loop_predication(this); @@ -3299,7 +3511,7 @@ #ifdef ASSERT if (legal->is_Start() && !early->is_Root()) { // Bad graph. Print idom path and fail. - dump_bad_graph(n, early, LCA); + dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA); assert(false, "Bad graph detected in build_loop_late"); } #endif @@ -3350,8 +3562,8 @@ } #ifdef ASSERT -void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) { - tty->print_cr( "Bad graph detected in build_loop_late"); +void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) { + tty->print_cr(msg); tty->print("n: "); n->dump(); tty->print("early(n): "); early->dump(); if (n->in(0) != NULL && !n->in(0)->is_top() && diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/loopnode.hpp Tue Feb 12 12:56:11 2013 +0100 @@ -263,9 +263,18 @@ bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } CountedLoopNode *loopnode() const { + // The CountedLoopNode that goes with this CountedLoopEndNode may + // have been optimized out by the IGVN so be cautious with the + // pattern matching on the graph + if (phi() == NULL) { + return NULL; + } Node *ln = phi()->in(0); - assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); - return (CountedLoopNode*)ln; } + if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) { + return (CountedLoopNode*)ln; + } + return NULL; + } #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; @@ -598,6 +607,7 @@ // check if transform created new nodes that need _ctrl recorded Node *get_late_ctrl( Node *n, Node *early ); Node *get_early_ctrl( Node *n ); + Node *get_early_ctrl_for_expensive(Node *n, Node* earliest); void set_early_ctrl( Node *n ); void set_subtree_ctrl( Node *root ); void set_ctrl( Node *n, Node *ctrl ) { @@ -905,6 +915,16 @@ void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); void eliminate_useless_predicates(); + // Change the control input of expensive nodes to allow commoning by + // IGVN when it is guaranteed to not result in a more frequent + // execution of the expensive node. Return true if progress. + bool process_expensive_nodes(); + + // Check whether node has become unreachable + bool is_node_unreachable(Node *n) const { + return !has_node(n) || n->is_unreachable(_igvn); + } + // Eliminate range-checks and other trip-counter vs loop-invariant tests. void do_range_check( IdealLoopTree *loop, Node_List &old_new ); @@ -1043,7 +1063,7 @@ void register_new_node( Node *n, Node *blk ); #ifdef ASSERT -void dump_bad_graph(Node* n, Node* early, Node* LCA); + void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA); #endif #ifndef PRODUCT diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/node.cpp --- a/src/share/vm/opto/node.cpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/node.cpp Tue Feb 12 12:56:11 2013 +0100 @@ -493,6 +493,8 @@ } if (is_macro()) compile->add_macro_node(n); + if (is_expensive()) + compile->add_expensive_node(n); n->set_idx(compile->next_unique()); // Get new unique index as well debug_only( n->verify_construction() ); @@ -616,6 +618,9 @@ if (is_macro()) { compile->remove_macro_node(this); } + if (is_expensive()) { + compile->remove_expensive_node(this); + } #ifdef ASSERT // We will not actually delete the storage, but we'll make the node unusable. *(address*)this = badAddress; // smash the C++ vtbl, probably @@ -689,6 +694,13 @@ } #endif + +//------------------------------is_unreachable--------------------------------- +bool Node::is_unreachable(PhaseIterGVN &igvn) const { + assert(!is_Mach(), "doesn't work with MachNodes"); + return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top(); +} + //------------------------------add_req---------------------------------------- // Add a new required input at the end void Node::add_req( Node *n ) { @@ -1246,6 +1258,9 @@ if (dead->is_macro()) { igvn->C->remove_macro_node(dead); } + if (dead->is_expensive()) { + igvn->C->remove_expensive_node(dead); + } // Kill all inputs to the dead guy for (uint i=0; i < dead->req(); i++) { Node *n = dead->in(i); // Get input to dead guy diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/node.hpp Tue Feb 12 12:56:11 2013 +0100 @@ -378,6 +378,8 @@ bool is_dead() const; #define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead())) #endif + // Check whether node has become unreachable + bool is_unreachable(PhaseIterGVN &igvn) const; // Set a required input edge, also updates corresponding output edge void add_req( Node *n ); // Append a NEW required input @@ -646,7 +648,8 @@ Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, Flag_avoid_back_to_back = Flag_may_be_short_branch << 1, Flag_has_call = Flag_avoid_back_to_back << 1, - _max_flags = (Flag_has_call << 1) - 1 // allow flags combination + Flag_is_expensive = Flag_has_call << 1, + _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination }; private: @@ -819,6 +822,8 @@ // The node is a "macro" node which needs to be expanded before matching bool is_macro() const { return (_flags & Flag_is_macro) != 0; } + // The node is expensive: the best control is set during loop opts + bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; } //----------------- Optimization diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/phaseX.cpp Tue Feb 12 12:56:11 2013 +0100 @@ -1203,6 +1203,9 @@ if (dead->is_macro()) { C->remove_macro_node(dead); } + if (dead->is_expensive()) { + C->remove_expensive_node(dead); + } if (recurse) { continue; diff -r 1e5e28bac299 -r 8b3da8d14c93 src/share/vm/opto/subnode.hpp --- a/src/share/vm/opto/subnode.hpp Mon Feb 11 14:47:04 2013 -0800 +++ b/src/share/vm/opto/subnode.hpp Tue Feb 12 12:56:11 2013 +0100 @@ -456,7 +456,10 @@ // Exponentiate a double class ExpDNode : public Node { public: - ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {} + ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) { + init_flags(Flag_is_expensive); + C->add_expensive_node(this); + } virtual int Opcode() const; const Type *bottom_type() const { return Type::DOUBLE; } virtual uint ideal_reg() const { return Op_RegD; } @@ -489,7 +492,10 @@ // Raise a double to a double power class PowDNode : public Node { public: - PowDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {} + PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) { + init_flags(Flag_is_expensive); + C->add_expensive_node(this); + } virtual int Opcode() const; const Type *bottom_type() const { return Type::DOUBLE; } virtual uint ideal_reg() const { return Op_RegD; }