Mercurial > hg > graal-compiler
diff src/share/vm/opto/gcm.cpp @ 12071:adb9a7d94cb5
8023003: Cleanup the public interface to PhaseCFG
Summary: public methods that don't need to be public should be private.
Reviewed-by: kvn, twisti
author | adlertz |
---|---|
date | Fri, 16 Aug 2013 10:23:55 +0200 |
parents | d1034bd8cefc |
children | 650868c062a9 e2722a66aba7 |
line wrap: on
line diff
--- a/src/share/vm/opto/gcm.cpp Thu Aug 15 11:59:19 2013 -0700 +++ b/src/share/vm/opto/gcm.cpp Fri Aug 16 10:23:55 2013 +0200 @@ -121,27 +121,30 @@ //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks -void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { +void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { // Allocate node stack of size C->unique()+8 to avoid frequent realloc - GrowableArray <Node *> spstack(C->unique()+8); + GrowableArray <Node *> spstack(C->unique() + 8); spstack.push(_root); - while ( spstack.is_nonempty() ) { - Node *n = spstack.pop(); - if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited - if( n->pinned() && !has_block(n)) { // Pinned? Nail it down! - assert( n->in(0), "pinned Node must have Control" ); + while (spstack.is_nonempty()) { + Node* node = spstack.pop(); + if (!visited.test_set(node->_idx)) { // Test node and flag it as visited + if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! + assert(node->in(0), "pinned Node must have Control"); // Before setting block replace block_proj control edge - replace_block_proj_ctrl(n); - Node *input = n->in(0); + replace_block_proj_ctrl(node); + Node* input = node->in(0); while (!input->is_block_start()) { input = input->in(0); } - Block *b = get_block_for_node(input); // Basic block of controlling input - schedule_node_into_block(n, b); + Block* block = get_block_for_node(input); // Basic block of controlling input + schedule_node_into_block(node, block); } - for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs - if( n->in(i) != NULL ) - spstack.push(n->in(i)); + + // process all inputs that are non NULL + for (int i = node->req() - 1; i >= 0; --i) { + if (node->in(i) != NULL) { + spstack.push(node->in(i)); + } } } } @@ -205,32 +208,29 @@ // which all their inputs occur. bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { // Allocate stack with enough space to avoid frequent realloc - Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats - // roots.push(_root); _root will be processed among C->top() inputs + Node_Stack nstack(roots.Size() + 8); + // _root will be processed among C->top() inputs roots.push(C->top()); visited.set(C->top()->_idx); while (roots.size() != 0) { // Use local variables nstack_top_n & nstack_top_i to cache values // on stack's top. - Node *nstack_top_n = roots.pop(); - uint nstack_top_i = 0; -//while_nstack_nonempty: + Node* parent_node = roots.pop(); + uint input_index = 0; + while (true) { - // Get parent node and next input's index from stack's top. - Node *n = nstack_top_n; - uint i = nstack_top_i; - - if (i == 0) { + if (input_index == 0) { // Fixup some control. Constants without control get attached // to root and nodes that use is_block_proj() nodes should be attached // to the region that starts their block. - const Node *in0 = n->in(0); - if (in0 != NULL) { // Control-dependent? - replace_block_proj_ctrl(n); - } else { // n->in(0) == NULL - if (n->req() == 1) { // This guy is a constant with NO inputs? - n->set_req(0, _root); + const Node* control_input = parent_node->in(0); + if (control_input != NULL) { + replace_block_proj_ctrl(parent_node); + } else { + // Is a constant with NO inputs? + if (parent_node->req() == 1) { + parent_node->set_req(0, _root); } } } @@ -239,37 +239,47 @@ // input is already in a block we quit following inputs (to avoid // cycles). Instead we put that Node on a worklist to be handled // later (since IT'S inputs may not have a block yet). - bool done = true; // Assume all n's inputs will be processed - while (i < n->len()) { // For all inputs - Node *in = n->in(i); // Get input - ++i; - if (in == NULL) continue; // Ignore NULL, missing inputs + + // Assume all n's inputs will be processed + bool done = true; + + while (input_index < parent_node->len()) { + Node* in = parent_node->in(input_index++); + if (in == NULL) { + continue; + } + int is_visited = visited.test_set(in->_idx); - if (!has_block(in)) { // Missing block selection? + if (!has_block(in)) { if (is_visited) { - // assert( !visited.test(in->_idx), "did not schedule early" ); return false; } - nstack.push(n, i); // Save parent node and next input's index. - nstack_top_n = in; // Process current input now. - nstack_top_i = 0; - done = false; // Not all n's inputs processed. - break; // continue while_nstack_nonempty; - } else if (!is_visited) { // Input not yet visited? - roots.push(in); // Visit this guy later, using worklist + // Save parent node and next input's index. + nstack.push(parent_node, input_index); + // Process current input now. + parent_node = in; + input_index = 0; + // Not all n's inputs processed. + done = false; + break; + } else if (!is_visited) { + // Visit this guy later, using worklist + roots.push(in); } } + if (done) { // All of n's inputs have been processed, complete post-processing. // Some instructions are pinned into a block. These include Region, // Phi, Start, Return, and other control-dependent instructions and // any projections which depend on them. - if (!n->pinned()) { + if (!parent_node->pinned()) { // Set earliest legal block. - map_node_to_block(n, find_deepest_input(n, this)); + Block* earliest_block = find_deepest_input(parent_node, this); + map_node_to_block(parent_node, earliest_block); } else { - assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge"); + assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); } if (nstack.is_empty()) { @@ -278,12 +288,12 @@ break; } // Get saved parent node and next input's index. - nstack_top_n = nstack.node(); - nstack_top_i = nstack.index(); + parent_node = nstack.node(); + input_index = nstack.index(); nstack.pop(); - } // if (done) - } // while (true) - } // while (roots.size() != 0) + } + } + } return true; } @@ -847,7 +857,7 @@ //------------------------------ComputeLatenciesBackwards---------------------- // Compute the latency of all the instructions. -void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { +void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { #ifndef PRODUCT if (trace_opto_pipelining()) tty->print("\n#---- ComputeLatenciesBackwards ----\n"); @@ -870,31 +880,34 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_to_inputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif - if (n->is_Proj()) + if (n->is_Proj()) { n = n->in(0); + } - if (n->is_Root()) + if (n->is_Root()) { return; + } uint nlen = n->len(); - uint use_latency = _node_latency->at_grow(n->_idx); + uint use_latency = get_latency_for_node(n); uint use_pre_order = get_block_for_node(n)->_pre_order; - for ( uint j=0; j<nlen; j++ ) { + for (uint j = 0; j < nlen; j++) { Node *def = n->in(j); - if (!def || def == n) + if (!def || def == n) { continue; + } // Walk backwards thru projections - if (def->is_Proj()) + if (def->is_Proj()) { def = def->in(0); + } #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -907,22 +920,20 @@ Block *def_block = get_block_for_node(def); uint def_pre_order = def_block ? def_block->_pre_order : 0; - if ( (use_pre_order < def_pre_order) || - (use_pre_order == def_pre_order && n->is_Phi()) ) + if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { continue; + } uint delta_latency = n->latency(j); uint current_latency = delta_latency + use_latency; - if (_node_latency->at_grow(def->_idx) < current_latency) { - _node_latency->at_put_grow(def->_idx, current_latency); + if (get_latency_for_node(def) < current_latency) { + set_latency_for_node(def, current_latency); } #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", - use_latency, j, delta_latency, current_latency, def->_idx, - _node_latency->at_grow(def->_idx)); + tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); } #endif } @@ -957,7 +968,7 @@ return 0; uint nlen = use->len(); - uint nl = _node_latency->at_grow(use->_idx); + uint nl = get_latency_for_node(use); for ( uint j=0; j<nlen; j++ ) { if (use->in(j) == n) { @@ -992,8 +1003,7 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_from_outputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif @@ -1006,7 +1016,7 @@ if (latency < l) latency = l; } - _node_latency->at_put_grow(n->_idx, latency); + set_latency_for_node(n, latency); } //------------------------------hoist_to_cheaper_block------------------------- @@ -1016,9 +1026,9 @@ const double delta = 1+PROB_UNLIKELY_MAG(4); Block* least = LCA; double least_freq = least->_freq; - uint target = _node_latency->at_grow(self->_idx); - uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); - uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); + uint target = get_latency_for_node(self); + uint start_latency = get_latency_for_node(LCA->_nodes[0]); + uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); bool in_latency = (target <= start_latency); const Block* root_block = get_block_for_node(_root); @@ -1035,8 +1045,7 @@ #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# Find cheaper block for latency %d: ", - _node_latency->at_grow(self->_idx)); + tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); self->dump(); tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", LCA->_pre_order, @@ -1065,9 +1074,9 @@ if (mach && LCA == root_block) break; - uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); + uint start_lat = get_latency_for_node(LCA->_nodes[0]); uint end_idx = LCA->end_idx(); - uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); + uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); double LCA_freq = LCA->_freq; #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -1109,7 +1118,7 @@ tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); } #endif - _node_latency->at_put_grow(self->_idx, end_latency); + set_latency_for_node(self, end_latency); partial_latency_of_defs(self); } @@ -1255,7 +1264,7 @@ } // end ScheduleLate //------------------------------GlobalCodeMotion------------------------------- -void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { +void PhaseCFG::global_code_motion() { ResourceMark rm; #ifndef PRODUCT @@ -1265,21 +1274,22 @@ #endif // Initialize the node to block mapping for things on the proj_list - for (uint i = 0; i < proj_list.size(); i++) { - unmap_node_from_block(proj_list[i]); + for (uint i = 0; i < _matcher.number_of_projections(); i++) { + unmap_node_from_block(_matcher.get_projection(i)); } // Set the basic block for Nodes pinned into blocks - Arena *a = Thread::current()->resource_area(); - VectorSet visited(a); - schedule_pinned_nodes( visited ); + Arena* arena = Thread::current()->resource_area(); + VectorSet visited(arena); + schedule_pinned_nodes(visited); // Find the earliest Block any instruction can be placed in. Some // instructions are pinned into Blocks. Unpinned instructions can // appear in last block in which all their inputs occur. visited.Clear(); - Node_List stack(a); - stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list + Node_List stack(arena); + // Pre-grow the list + stack.map((C->unique() >> 1) + 16, NULL); if (!schedule_early(visited, stack)) { // Bailout without retry C->record_method_not_compilable("early schedule failed"); @@ -1287,29 +1297,25 @@ } // Build Def-Use edges. - proj_list.push(_root); // Add real root as another root - proj_list.pop(); - // Compute the latency information (via backwards walk) for all the // instructions in the graph _node_latency = new GrowableArray<uint>(); // resource_area allocation - if( C->do_scheduling() ) - ComputeLatenciesBackwards(visited, stack); + if (C->do_scheduling()) { + compute_latencies_backwards(visited, stack); + } // Now schedule all codes as LATE as possible. This is the LCA in the // dominator tree of all USES of a value. Pick the block with the least // loop nesting depth that is lowest in the dominator tree. // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) schedule_late(visited, stack); - if( C->failing() ) { + if (C->failing()) { // schedule_late fails only when graph is incorrect. assert(!VerifyGraphEdges, "verification should have failed"); return; } - unique = C->unique(); - #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- Detect implicit null checks ----\n"); @@ -1332,10 +1338,11 @@ // By reversing the loop direction we get a very minor gain on mpegaudio. // Feel free to revert to a forward loop for clarity. // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { - for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { - Node *proj = matcher._null_check_tests[i ]; - Node *val = matcher._null_check_tests[i+1]; - get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons); + for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { + Node* proj = _matcher._null_check_tests[i]; + Node* val = _matcher._null_check_tests[i + 1]; + Block* block = get_block_for_node(proj); + block->implicit_null_check(this, proj, val, allowed_reasons); // The implicit_null_check will only perform the transformation // if the null branch is truly uncommon, *and* it leads to an // uncommon trap. Combined with the too_many_traps guards @@ -1352,11 +1359,11 @@ // Schedule locally. Right now a simple topological sort. // Later, do a real latency aware scheduler. - uint max_idx = C->unique(); - GrowableArray<int> ready_cnt(max_idx, max_idx, -1); + GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); visited.Clear(); - for (uint i = 0; i < _num_blocks; i++) { - if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { C->record_method_not_compilable("local schedule failed"); } @@ -1366,15 +1373,17 @@ // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->call_catch_cleanup(this, C); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->call_catch_cleanup(this, C); } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- After GlobalCodeMotion ----\n"); - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->dump(); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->dump(); } } #endif @@ -1382,10 +1391,29 @@ _node_latency = (GrowableArray<uint> *)0xdeadbeef; } +bool PhaseCFG::do_global_code_motion() { + + build_dominator_tree(); + if (C->failing()) { + return false; + } + + NOT_PRODUCT( C->verify_graph_edges(); ) + + estimate_block_frequency(); + + global_code_motion(); + + if (C->failing()) { + return false; + } + + return true; +} //------------------------------Estimate_Block_Frequency----------------------- // Estimate block frequencies based on IfNode probabilities. -void PhaseCFG::Estimate_Block_Frequency() { +void PhaseCFG::estimate_block_frequency() { // Force conditional branches leading to uncommon traps to be unlikely, // not because we get to the uncommon_trap with less relative frequency, @@ -1393,7 +1421,7 @@ // there once. if (C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { @@ -1402,7 +1430,9 @@ } while (worklist.size() > 0) { Block* uct = worklist.pop(); - if (uct == _broot) continue; + if (uct == get_root_block()) { + continue; + } for (uint i = 1; i < uct->num_preds(); i++) { Block *pb = get_block_for_node(uct->pred(i)); if (pb->_num_succs == 1) { @@ -1426,12 +1456,12 @@ _root_loop->scale_freq(); // Save outmost loop frequency for LRG frequency threshold - _outer_loop_freq = _root_loop->outer_loop_freq(); + _outer_loop_frequency = _root_loop->outer_loop_freq(); // force paths ending at uncommon traps to be infrequent if (!C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { @@ -1451,8 +1481,8 @@ } #ifdef ASSERT - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* b = get_block(i); assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); } #endif @@ -1476,16 +1506,16 @@ CFGLoop* PhaseCFG::create_loop_tree() { #ifdef ASSERT - assert( _blocks[0] == _broot, "" ); - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + assert(get_block(0) == get_root_block(), "first block should be root block"); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); // Check that _loop field are clear...we could clear them if not. - assert(b->_loop == NULL, "clear _loop expected"); + assert(block->_loop == NULL, "clear _loop expected"); // Sanity check that the RPO numbering is reflected in the _blocks array. // It doesn't have to be for the loop tree to be built, but if it is not, // then the blocks have been reordered since dom graph building...which // may question the RPO numbering - assert(b->_rpo == i, "unexpected reverse post order number"); + assert(block->_rpo == i, "unexpected reverse post order number"); } #endif @@ -1495,11 +1525,11 @@ Block_List worklist; // Assign blocks to loops - for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block - Block *b = _blocks[i]; + for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block + Block* block = get_block(i); - if (b->head()->is_Loop()) { - Block* loop_head = b; + if (block->head()->is_Loop()) { + Block* loop_head = block; assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); Block* tail = get_block_for_node(tail_n); @@ -1533,23 +1563,23 @@ // Create a member list for each loop consisting // of both blocks and (immediate child) loops. - for (uint i = 0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - CFGLoop* lp = b->_loop; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + CFGLoop* lp = block->_loop; if (lp == NULL) { // Not assigned to a loop. Add it to the method's pseudo loop. - b->_loop = root_loop; + block->_loop = root_loop; lp = root_loop; } - if (lp == root_loop || b != lp->head()) { // loop heads are already members - lp->add_member(b); + if (lp == root_loop || block != lp->head()) { // loop heads are already members + lp->add_member(block); } if (lp != root_loop) { if (lp->parent() == NULL) { // Not a nested loop. Make it a child of the method's pseudo loop. root_loop->add_nested_loop(lp); } - if (b == lp->head()) { + if (block == lp->head()) { // Add nested loop to member list of parent loop. lp->parent()->add_member(lp); }