comparison src/share/vm/opto/gcm.cpp @ 12023:d1034bd8cefc

8022284: Hide internal data structure in PhaseCFG Summary: Hide private node to block mapping using public interface Reviewed-by: kvn, roland
author adlertz
date Wed, 07 Aug 2013 17:56:19 +0200
parents 571076d3c79d
children adb9a7d94cb5
comparison
equal deleted inserted replaced
12004:71526a36ebb4 12023:d1034bd8cefc
64 //----------------------------schedule_node_into_block------------------------- 64 //----------------------------schedule_node_into_block-------------------------
65 // Insert node n into block b. Look for projections of n and make sure they 65 // Insert node n into block b. Look for projections of n and make sure they
66 // are in b also. 66 // are in b also.
67 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { 67 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
68 // Set basic block of n, Add n to b, 68 // Set basic block of n, Add n to b,
69 _bbs.map(n->_idx, b); 69 map_node_to_block(n, b);
70 b->add_inst(n); 70 b->add_inst(n);
71 71
72 // After Matching, nearly any old Node may have projections trailing it. 72 // After Matching, nearly any old Node may have projections trailing it.
73 // These are usually machine-dependent flags. In any case, they might 73 // These are usually machine-dependent flags. In any case, they might
74 // float to another block below this one. Move them up. 74 // float to another block below this one. Move them up.
75 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 75 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
76 Node* use = n->fast_out(i); 76 Node* use = n->fast_out(i);
77 if (use->is_Proj()) { 77 if (use->is_Proj()) {
78 Block* buse = _bbs[use->_idx]; 78 Block* buse = get_block_for_node(use);
79 if (buse != b) { // In wrong block? 79 if (buse != b) { // In wrong block?
80 if (buse != NULL) 80 if (buse != NULL) {
81 buse->find_remove(use); // Remove from wrong block 81 buse->find_remove(use); // Remove from wrong block
82 _bbs.map(use->_idx, b); // Re-insert in this block 82 }
83 map_node_to_block(use, b);
83 b->add_inst(use); 84 b->add_inst(use);
84 } 85 }
85 } 86 }
86 } 87 }
87 } 88 }
95 assert(in0 != NULL, "Only control-dependent"); 96 assert(in0 != NULL, "Only control-dependent");
96 const Node *p = in0->is_block_proj(); 97 const Node *p = in0->is_block_proj();
97 if (p != NULL && p != n) { // Control from a block projection? 98 if (p != NULL && p != n) { // Control from a block projection?
98 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); 99 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
99 // Find trailing Region 100 // Find trailing Region
100 Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block 101 Block *pb = get_block_for_node(in0); // Block-projection already has basic block
101 uint j = 0; 102 uint j = 0;
102 if (pb->_num_succs != 1) { // More then 1 successor? 103 if (pb->_num_succs != 1) { // More then 1 successor?
103 // Search for successor 104 // Search for successor
104 uint max = pb->_nodes.size(); 105 uint max = pb->_nodes.size();
105 assert( max > 1, "" ); 106 assert( max > 1, "" );
125 GrowableArray <Node *> spstack(C->unique()+8); 126 GrowableArray <Node *> spstack(C->unique()+8);
126 spstack.push(_root); 127 spstack.push(_root);
127 while ( spstack.is_nonempty() ) { 128 while ( spstack.is_nonempty() ) {
128 Node *n = spstack.pop(); 129 Node *n = spstack.pop();
129 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited 130 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
130 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! 131 if( n->pinned() && !has_block(n)) { // Pinned? Nail it down!
131 assert( n->in(0), "pinned Node must have Control" ); 132 assert( n->in(0), "pinned Node must have Control" );
132 // Before setting block replace block_proj control edge 133 // Before setting block replace block_proj control edge
133 replace_block_proj_ctrl(n); 134 replace_block_proj_ctrl(n);
134 Node *input = n->in(0); 135 Node *input = n->in(0);
135 while( !input->is_block_start() ) 136 while (!input->is_block_start()) {
136 input = input->in(0); 137 input = input->in(0);
137 Block *b = _bbs[input->_idx]; // Basic block of controlling input 138 }
139 Block *b = get_block_for_node(input); // Basic block of controlling input
138 schedule_node_into_block(n, b); 140 schedule_node_into_block(n, b);
139 } 141 }
140 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs 142 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs
141 if( n->in(i) != NULL ) 143 if( n->in(i) != NULL )
142 spstack.push(n->in(i)); 144 spstack.push(n->in(i));
147 149
148 #ifdef ASSERT 150 #ifdef ASSERT
149 // Assert that new input b2 is dominated by all previous inputs. 151 // Assert that new input b2 is dominated by all previous inputs.
150 // Check this by by seeing that it is dominated by b1, the deepest 152 // Check this by by seeing that it is dominated by b1, the deepest
151 // input observed until b2. 153 // input observed until b2.
152 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { 154 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
153 if (b1 == NULL) return; 155 if (b1 == NULL) return;
154 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); 156 assert(b1->_dom_depth < b2->_dom_depth, "sanity");
155 Block* tmp = b2; 157 Block* tmp = b2;
156 while (tmp != b1 && tmp != NULL) { 158 while (tmp != b1 && tmp != NULL) {
157 tmp = tmp->_idom; 159 tmp = tmp->_idom;
160 // Detected an unschedulable graph. Print some nice stuff and die. 162 // Detected an unschedulable graph. Print some nice stuff and die.
161 tty->print_cr("!!! Unschedulable graph !!!"); 163 tty->print_cr("!!! Unschedulable graph !!!");
162 for (uint j=0; j<n->len(); j++) { // For all inputs 164 for (uint j=0; j<n->len(); j++) { // For all inputs
163 Node* inn = n->in(j); // Get input 165 Node* inn = n->in(j); // Get input
164 if (inn == NULL) continue; // Ignore NULL, missing inputs 166 if (inn == NULL) continue; // Ignore NULL, missing inputs
165 Block* inb = bbs[inn->_idx]; 167 Block* inb = cfg->get_block_for_node(inn);
166 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, 168 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
167 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); 169 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
168 inn->dump(); 170 inn->dump();
169 } 171 }
170 tty->print("Failing node: "); 172 tty->print("Failing node: ");
172 assert(false, "unscheduable graph"); 174 assert(false, "unscheduable graph");
173 } 175 }
174 } 176 }
175 #endif 177 #endif
176 178
177 static Block* find_deepest_input(Node* n, Block_Array &bbs) { 179 static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
178 // Find the last input dominated by all other inputs. 180 // Find the last input dominated by all other inputs.
179 Block* deepb = NULL; // Deepest block so far 181 Block* deepb = NULL; // Deepest block so far
180 int deepb_dom_depth = 0; 182 int deepb_dom_depth = 0;
181 for (uint k = 0; k < n->len(); k++) { // For all inputs 183 for (uint k = 0; k < n->len(); k++) { // For all inputs
182 Node* inn = n->in(k); // Get input 184 Node* inn = n->in(k); // Get input
183 if (inn == NULL) continue; // Ignore NULL, missing inputs 185 if (inn == NULL) continue; // Ignore NULL, missing inputs
184 Block* inb = bbs[inn->_idx]; 186 Block* inb = cfg->get_block_for_node(inn);
185 assert(inb != NULL, "must already have scheduled this input"); 187 assert(inb != NULL, "must already have scheduled this input");
186 if (deepb_dom_depth < (int) inb->_dom_depth) { 188 if (deepb_dom_depth < (int) inb->_dom_depth) {
187 // The new inb must be dominated by the previous deepb. 189 // The new inb must be dominated by the previous deepb.
188 // The various inputs must be linearly ordered in the dom 190 // The various inputs must be linearly ordered in the dom
189 // tree, or else there will not be a unique deepest block. 191 // tree, or else there will not be a unique deepest block.
190 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); 192 DEBUG_ONLY(assert_dom(deepb, inb, n, cfg));
191 deepb = inb; // Save deepest block 193 deepb = inb; // Save deepest block
192 deepb_dom_depth = deepb->_dom_depth; 194 deepb_dom_depth = deepb->_dom_depth;
193 } 195 }
194 } 196 }
195 assert(deepb != NULL, "must be at least one input to n"); 197 assert(deepb != NULL, "must be at least one input to n");
241 while (i < n->len()) { // For all inputs 243 while (i < n->len()) { // For all inputs
242 Node *in = n->in(i); // Get input 244 Node *in = n->in(i); // Get input
243 ++i; 245 ++i;
244 if (in == NULL) continue; // Ignore NULL, missing inputs 246 if (in == NULL) continue; // Ignore NULL, missing inputs
245 int is_visited = visited.test_set(in->_idx); 247 int is_visited = visited.test_set(in->_idx);
246 if (!_bbs.lookup(in->_idx)) { // Missing block selection? 248 if (!has_block(in)) { // Missing block selection?
247 if (is_visited) { 249 if (is_visited) {
248 // assert( !visited.test(in->_idx), "did not schedule early" ); 250 // assert( !visited.test(in->_idx), "did not schedule early" );
249 return false; 251 return false;
250 } 252 }
251 nstack.push(n, i); // Save parent node and next input's index. 253 nstack.push(n, i); // Save parent node and next input's index.
263 // Some instructions are pinned into a block. These include Region, 265 // Some instructions are pinned into a block. These include Region,
264 // Phi, Start, Return, and other control-dependent instructions and 266 // Phi, Start, Return, and other control-dependent instructions and
265 // any projections which depend on them. 267 // any projections which depend on them.
266 if (!n->pinned()) { 268 if (!n->pinned()) {
267 // Set earliest legal block. 269 // Set earliest legal block.
268 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); 270 map_node_to_block(n, find_deepest_input(n, this));
269 } else { 271 } else {
270 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); 272 assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
271 } 273 }
272 274
273 if (nstack.is_empty()) { 275 if (nstack.is_empty()) {
274 // Finished all nodes on stack. 276 // Finished all nodes on stack.
275 // Process next node on the worklist 'roots'. 277 // Process next node on the worklist 'roots'.
311 //--------------------------raise_LCA_above_use-------------------------------- 313 //--------------------------raise_LCA_above_use--------------------------------
312 // We are placing a definition, and have been given a def->use edge. 314 // We are placing a definition, and have been given a def->use edge.
313 // The definition must dominate the use, so move the LCA upward in the 315 // The definition must dominate the use, so move the LCA upward in the
314 // dominator tree to dominate the use. If the use is a phi, adjust 316 // dominator tree to dominate the use. If the use is a phi, adjust
315 // the LCA only with the phi input paths which actually use this def. 317 // the LCA only with the phi input paths which actually use this def.
316 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { 318 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) {
317 Block* buse = bbs[use->_idx]; 319 Block* buse = cfg->get_block_for_node(use);
318 if (buse == NULL) return LCA; // Unused killing Projs have no use block 320 if (buse == NULL) return LCA; // Unused killing Projs have no use block
319 if (!use->is_Phi()) return buse->dom_lca(LCA); 321 if (!use->is_Phi()) return buse->dom_lca(LCA);
320 uint pmax = use->req(); // Number of Phi inputs 322 uint pmax = use->req(); // Number of Phi inputs
321 // Why does not this loop just break after finding the matching input to 323 // Why does not this loop just break after finding the matching input to
322 // the Phi? Well...it's like this. I do not have true def-use/use-def 324 // the Phi? Well...it's like this. I do not have true def-use/use-def
327 // which use-def edge I should find the predecessor block for. So I find 329 // which use-def edge I should find the predecessor block for. So I find
328 // them all. Means I do a little extra work if a Phi uses the same value 330 // them all. Means I do a little extra work if a Phi uses the same value
329 // more than once. 331 // more than once.
330 for (uint j=1; j<pmax; j++) { // For all inputs 332 for (uint j=1; j<pmax; j++) { // For all inputs
331 if (use->in(j) == def) { // Found matching input? 333 if (use->in(j) == def) { // Found matching input?
332 Block* pred = bbs[buse->pred(j)->_idx]; 334 Block* pred = cfg->get_block_for_node(buse->pred(j));
333 LCA = pred->dom_lca(LCA); 335 LCA = pred->dom_lca(LCA);
334 } 336 }
335 } 337 }
336 return LCA; 338 return LCA;
337 } 339 }
340 // Return a new LCA that dominates LCA and any of its marked predecessors. 342 // Return a new LCA that dominates LCA and any of its marked predecessors.
341 // Search all my parents up to 'early' (exclusive), looking for predecessors 343 // Search all my parents up to 'early' (exclusive), looking for predecessors
342 // which are marked with the given index. Return the LCA (in the dom tree) 344 // which are marked with the given index. Return the LCA (in the dom tree)
343 // of all marked blocks. If there are none marked, return the original 345 // of all marked blocks. If there are none marked, return the original
344 // LCA. 346 // LCA.
345 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, 347 static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) {
346 Block* early, Block_Array &bbs) {
347 Block_List worklist; 348 Block_List worklist;
348 worklist.push(LCA); 349 worklist.push(LCA);
349 while (worklist.size() > 0) { 350 while (worklist.size() > 0) {
350 Block* mid = worklist.pop(); 351 Block* mid = worklist.pop();
351 if (mid == early) continue; // stop searching here 352 if (mid == early) continue; // stop searching here
364 if (LCA == mid) 365 if (LCA == mid)
365 continue; // Don't mark as visited to avoid early termination. 366 continue; // Don't mark as visited to avoid early termination.
366 } else { 367 } else {
367 // Keep searching through this block's predecessors. 368 // Keep searching through this block's predecessors.
368 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { 369 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
369 Block* mid_parent = bbs[ mid->pred(j)->_idx ]; 370 Block* mid_parent = cfg->get_block_for_node(mid->pred(j));
370 worklist.push(mid_parent); 371 worklist.push(mid_parent);
371 } 372 }
372 } 373 }
373 mid->set_raise_LCA_visited(mark); 374 mid->set_raise_LCA_visited(mark);
374 } 375 }
382 // 383 //
383 // Because a subset of edges are considered, the resulting block will 384 // Because a subset of edges are considered, the resulting block will
384 // be earlier (at a shallower dom_depth) than the true schedule_early 385 // be earlier (at a shallower dom_depth) than the true schedule_early
385 // point of the node. We compute this earlier block as a more permissive 386 // point of the node. We compute this earlier block as a more permissive
386 // site for anti-dependency insertion, but only if subsume_loads is enabled. 387 // site for anti-dependency insertion, but only if subsume_loads is enabled.
387 static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { 388 static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) {
388 Node* base; 389 Node* base;
389 Node* index; 390 Node* index;
390 Node* store = load->in(MemNode::Memory); 391 Node* store = load->in(MemNode::Memory);
391 load->as_Mach()->memory_inputs(base, index); 392 load->as_Mach()->memory_inputs(base, index);
392 393
410 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); 411 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
411 412
412 Block* deepb = NULL; // Deepest block so far 413 Block* deepb = NULL; // Deepest block so far
413 int deepb_dom_depth = 0; 414 int deepb_dom_depth = 0;
414 for (int i = 0; i < mem_inputs_length; i++) { 415 for (int i = 0; i < mem_inputs_length; i++) {
415 Block* inb = bbs[mem_inputs[i]->_idx]; 416 Block* inb = cfg->get_block_for_node(mem_inputs[i]);
416 if (deepb_dom_depth < (int) inb->_dom_depth) { 417 if (deepb_dom_depth < (int) inb->_dom_depth) {
417 // The new inb must be dominated by the previous deepb. 418 // The new inb must be dominated by the previous deepb.
418 // The various inputs must be linearly ordered in the dom 419 // The various inputs must be linearly ordered in the dom
419 // tree, or else there will not be a unique deepest block. 420 // tree, or else there will not be a unique deepest block.
420 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); 421 DEBUG_ONLY(assert_dom(deepb, inb, load, cfg));
421 deepb = inb; // Save deepest block 422 deepb = inb; // Save deepest block
422 deepb_dom_depth = deepb->_dom_depth; 423 deepb_dom_depth = deepb->_dom_depth;
423 } 424 }
424 } 425 }
425 early = deepb; 426 early = deepb;
486 // Note the earliest legal placement of 'load', as determined by 487 // Note the earliest legal placement of 'load', as determined by
487 // by the unique point in the dom tree where all memory effects 488 // by the unique point in the dom tree where all memory effects
488 // and other inputs are first available. (Computed by schedule_early.) 489 // and other inputs are first available. (Computed by schedule_early.)
489 // For normal loads, 'early' is the shallowest place (dom graph wise) 490 // For normal loads, 'early' is the shallowest place (dom graph wise)
490 // to look for anti-deps between this load and any store. 491 // to look for anti-deps between this load and any store.
491 Block* early = _bbs[load_index]; 492 Block* early = get_block_for_node(load);
492 493
493 // If we are subsuming loads, compute an "early" block that only considers 494 // If we are subsuming loads, compute an "early" block that only considers
494 // memory or address inputs. This block may be different than the 495 // memory or address inputs. This block may be different than the
495 // schedule_early block in that it could be at an even shallower depth in the 496 // schedule_early block in that it could be at an even shallower depth in the
496 // dominator tree, and allow for a broader discovery of anti-dependences. 497 // dominator tree, and allow for a broader discovery of anti-dependences.
497 if (C->subsume_loads()) { 498 if (C->subsume_loads()) {
498 early = memory_early_block(load, early, _bbs); 499 early = memory_early_block(load, early, this);
499 } 500 }
500 501
501 ResourceArea *area = Thread::current()->resource_area(); 502 ResourceArea *area = Thread::current()->resource_area();
502 Node_List worklist_mem(area); // prior memory state to store 503 Node_List worklist_mem(area); // prior memory state to store
503 Node_List worklist_store(area); // possible-def to explore 504 Node_List worklist_store(area); // possible-def to explore
617 618
618 // Identify a block that the current load must be above, 619 // Identify a block that the current load must be above,
619 // or else observe that 'store' is all the way up in the 620 // or else observe that 'store' is all the way up in the
620 // earliest legal block for 'load'. In the latter case, 621 // earliest legal block for 'load'. In the latter case,
621 // immediately insert an anti-dependence edge. 622 // immediately insert an anti-dependence edge.
622 Block* store_block = _bbs[store->_idx]; 623 Block* store_block = get_block_for_node(store);
623 assert(store_block != NULL, "unused killing projections skipped above"); 624 assert(store_block != NULL, "unused killing projections skipped above");
624 625
625 if (store->is_Phi()) { 626 if (store->is_Phi()) {
626 // 'load' uses memory which is one (or more) of the Phi's inputs. 627 // 'load' uses memory which is one (or more) of the Phi's inputs.
627 // It must be scheduled not before the Phi, but rather before 628 // It must be scheduled not before the Phi, but rather before
635 // PhiNode may be at start of block 'early' with backedge to 'early' 636 // PhiNode may be at start of block 'early' with backedge to 'early'
636 DEBUG_ONLY(bool found_match = false); 637 DEBUG_ONLY(bool found_match = false);
637 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { 638 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
638 if (store->in(j) == mem) { // Found matching input? 639 if (store->in(j) == mem) { // Found matching input?
639 DEBUG_ONLY(found_match = true); 640 DEBUG_ONLY(found_match = true);
640 Block* pred_block = _bbs[store_block->pred(j)->_idx]; 641 Block* pred_block = get_block_for_node(store_block->pred(j));
641 if (pred_block != early) { 642 if (pred_block != early) {
642 // If any predecessor of the Phi matches the load's "early block", 643 // If any predecessor of the Phi matches the load's "early block",
643 // we do not need a precedence edge between the Phi and 'load' 644 // we do not need a precedence edge between the Phi and 'load'
644 // since the load will be forced into a block preceding the Phi. 645 // since the load will be forced into a block preceding the Phi.
645 pred_block->set_raise_LCA_mark(load_index); 646 pred_block->set_raise_LCA_mark(load_index);
709 // 710 //
710 // The raised LCA will be a lower bound for placing the load, 711 // The raised LCA will be a lower bound for placing the load,
711 // preventing the load from sinking past any block containing 712 // preventing the load from sinking past any block containing
712 // a store that may invalidate the memory state required by 'load'. 713 // a store that may invalidate the memory state required by 'load'.
713 if (must_raise_LCA) 714 if (must_raise_LCA)
714 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); 715 LCA = raise_LCA_above_marks(LCA, load->_idx, early, this);
715 if (LCA == early) return LCA; 716 if (LCA == early) return LCA;
716 717
717 // Insert anti-dependence edges from 'load' to each store 718 // Insert anti-dependence edges from 'load' to each store
718 // in the non-early LCA block. 719 // in the non-early LCA block.
719 // Mine the non_early_stores list for such stores. 720 // Mine the non_early_stores list for such stores.
720 if (LCA->raise_LCA_mark() == load_index) { 721 if (LCA->raise_LCA_mark() == load_index) {
721 while (non_early_stores.size() > 0) { 722 while (non_early_stores.size() > 0) {
722 Node* store = non_early_stores.pop(); 723 Node* store = non_early_stores.pop();
723 Block* store_block = _bbs[store->_idx]; 724 Block* store_block = get_block_for_node(store);
724 if (store_block == LCA) { 725 if (store_block == LCA) {
725 // add anti_dependence from store to load in its own block 726 // add anti_dependence from store to load in its own block
726 assert(store != load->in(0), "dependence cycle found"); 727 assert(store != load->in(0), "dependence cycle found");
727 if (verify) { 728 if (verify) {
728 assert(store->find_edge(load) != -1, "missing precedence edge"); 729 assert(store->find_edge(load) != -1, "missing precedence edge");
752 private: 753 private:
753 Node_Backward_Iterator(); 754 Node_Backward_Iterator();
754 755
755 public: 756 public:
756 // Constructor for the iterator 757 // Constructor for the iterator
757 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); 758 Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg);
758 759
759 // Postincrement operator to iterate over the nodes 760 // Postincrement operator to iterate over the nodes
760 Node *next(); 761 Node *next();
761 762
762 private: 763 private:
763 VectorSet &_visited; 764 VectorSet &_visited;
764 Node_List &_stack; 765 Node_List &_stack;
765 Block_Array &_bbs; 766 PhaseCFG &_cfg;
766 }; 767 };
767 768
768 // Constructor for the Node_Backward_Iterator 769 // Constructor for the Node_Backward_Iterator
769 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) 770 Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg)
770 : _visited(visited), _stack(stack), _bbs(bbs) { 771 : _visited(visited), _stack(stack), _cfg(cfg) {
771 // The stack should contain exactly the root 772 // The stack should contain exactly the root
772 stack.clear(); 773 stack.clear();
773 stack.push(root); 774 stack.push(root);
774 775
775 // Clear the visited bits 776 // Clear the visited bits
795 while( 1 ) { 796 while( 1 ) {
796 797
797 _visited.set(self->_idx); 798 _visited.set(self->_idx);
798 799
799 // Now schedule all uses as late as possible. 800 // Now schedule all uses as late as possible.
800 uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; 801 const Node* src = self->is_Proj() ? self->in(0) : self;
801 uint src_rpo = _bbs[src]->_rpo; 802 uint src_rpo = _cfg.get_block_for_node(src)->_rpo;
802 803
803 // Schedule all nodes in a post-order visit 804 // Schedule all nodes in a post-order visit
804 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any 805 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any
805 806
806 // Scan for unvisited nodes 807 // Scan for unvisited nodes
812 if ( _visited.test(n->_idx) ) 813 if ( _visited.test(n->_idx) )
813 continue; 814 continue;
814 815
815 // do not traverse backward control edges 816 // do not traverse backward control edges
816 Node *use = n->is_Proj() ? n->in(0) : n; 817 Node *use = n->is_Proj() ? n->in(0) : n;
817 uint use_rpo = _bbs[use->_idx]->_rpo; 818 uint use_rpo = _cfg.get_block_for_node(use)->_rpo;
818 819
819 if ( use_rpo < src_rpo ) 820 if ( use_rpo < src_rpo )
820 continue; 821 continue;
821 822
822 // Phi nodes always precede uses in a basic block 823 // Phi nodes always precede uses in a basic block
850 #ifndef PRODUCT 851 #ifndef PRODUCT
851 if (trace_opto_pipelining()) 852 if (trace_opto_pipelining())
852 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); 853 tty->print("\n#---- ComputeLatenciesBackwards ----\n");
853 #endif 854 #endif
854 855
855 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); 856 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
856 Node *n; 857 Node *n;
857 858
858 // Walk over all the nodes from last to first 859 // Walk over all the nodes from last to first
859 while (n = iter.next()) { 860 while (n = iter.next()) {
860 // Set the latency for the definitions of this instruction 861 // Set the latency for the definitions of this instruction
881 if (n->is_Root()) 882 if (n->is_Root())
882 return; 883 return;
883 884
884 uint nlen = n->len(); 885 uint nlen = n->len();
885 uint use_latency = _node_latency->at_grow(n->_idx); 886 uint use_latency = _node_latency->at_grow(n->_idx);
886 uint use_pre_order = _bbs[n->_idx]->_pre_order; 887 uint use_pre_order = get_block_for_node(n)->_pre_order;
887 888
888 for ( uint j=0; j<nlen; j++ ) { 889 for ( uint j=0; j<nlen; j++ ) {
889 Node *def = n->in(j); 890 Node *def = n->in(j);
890 891
891 if (!def || def == n) 892 if (!def || def == n)
901 def->dump(); 902 def->dump();
902 } 903 }
903 #endif 904 #endif
904 905
905 // If the defining block is not known, assume it is ok 906 // If the defining block is not known, assume it is ok
906 Block *def_block = _bbs[def->_idx]; 907 Block *def_block = get_block_for_node(def);
907 uint def_pre_order = def_block ? def_block->_pre_order : 0; 908 uint def_pre_order = def_block ? def_block->_pre_order : 0;
908 909
909 if ( (use_pre_order < def_pre_order) || 910 if ( (use_pre_order < def_pre_order) ||
910 (use_pre_order == def_pre_order && n->is_Phi()) ) 911 (use_pre_order == def_pre_order && n->is_Phi()) )
911 continue; 912 continue;
929 930
930 //------------------------------latency_from_use------------------------------- 931 //------------------------------latency_from_use-------------------------------
931 // Compute the latency of a specific use 932 // Compute the latency of a specific use
932 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { 933 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
933 // If self-reference, return no latency 934 // If self-reference, return no latency
934 if (use == n || use->is_Root()) 935 if (use == n || use->is_Root()) {
935 return 0; 936 return 0;
936 937 }
937 uint def_pre_order = _bbs[def->_idx]->_pre_order; 938
939 uint def_pre_order = get_block_for_node(def)->_pre_order;
938 uint latency = 0; 940 uint latency = 0;
939 941
940 // If the use is not a projection, then it is simple... 942 // If the use is not a projection, then it is simple...
941 if (!use->is_Proj()) { 943 if (!use->is_Proj()) {
942 #ifndef PRODUCT 944 #ifndef PRODUCT
944 tty->print("# out(): "); 946 tty->print("# out(): ");
945 use->dump(); 947 use->dump();
946 } 948 }
947 #endif 949 #endif
948 950
949 uint use_pre_order = _bbs[use->_idx]->_pre_order; 951 uint use_pre_order = get_block_for_node(use)->_pre_order;
950 952
951 if (use_pre_order < def_pre_order) 953 if (use_pre_order < def_pre_order)
952 return 0; 954 return 0;
953 955
954 if (use_pre_order == def_pre_order && use->is_Phi()) 956 if (use_pre_order == def_pre_order && use->is_Phi())
1016 double least_freq = least->_freq; 1018 double least_freq = least->_freq;
1017 uint target = _node_latency->at_grow(self->_idx); 1019 uint target = _node_latency->at_grow(self->_idx);
1018 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); 1020 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
1019 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); 1021 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
1020 bool in_latency = (target <= start_latency); 1022 bool in_latency = (target <= start_latency);
1021 const Block* root_block = _bbs[_root->_idx]; 1023 const Block* root_block = get_block_for_node(_root);
1022 1024
1023 // Turn off latency scheduling if scheduling is just plain off 1025 // Turn off latency scheduling if scheduling is just plain off
1024 if (!C->do_scheduling()) 1026 if (!C->do_scheduling())
1025 in_latency = true; 1027 in_latency = true;
1026 1028
1124 #ifndef PRODUCT 1126 #ifndef PRODUCT
1125 if (trace_opto_pipelining()) 1127 if (trace_opto_pipelining())
1126 tty->print("\n#---- schedule_late ----\n"); 1128 tty->print("\n#---- schedule_late ----\n");
1127 #endif 1129 #endif
1128 1130
1129 Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); 1131 Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
1130 Node *self; 1132 Node *self;
1131 1133
1132 // Walk over all the nodes from last to first 1134 // Walk over all the nodes from last to first
1133 while (self = iter.next()) { 1135 while (self = iter.next()) {
1134 Block* early = _bbs[self->_idx]; // Earliest legal placement 1136 Block* early = get_block_for_node(self); // Earliest legal placement
1135 1137
1136 if (self->is_top()) { 1138 if (self->is_top()) {
1137 // Top node goes in bb #2 with other constants. 1139 // Top node goes in bb #2 with other constants.
1138 // It must be special-cased, because it has no out edges. 1140 // It must be special-cased, because it has no out edges.
1139 early->add_inst(self); 1141 early->add_inst(self);
1177 Block *LCA = NULL; 1179 Block *LCA = NULL;
1178 { 1180 {
1179 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { 1181 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
1180 // For all uses, find LCA 1182 // For all uses, find LCA
1181 Node* use = self->fast_out(i); 1183 Node* use = self->fast_out(i);
1182 LCA = raise_LCA_above_use(LCA, use, self, _bbs); 1184 LCA = raise_LCA_above_use(LCA, use, self, this);
1183 } 1185 }
1184 } // (Hide defs of imax, i from rest of block.) 1186 } // (Hide defs of imax, i from rest of block.)
1185 1187
1186 // Place temps in the block of their use. This isn't a 1188 // Place temps in the block of their use. This isn't a
1187 // requirement for correctness but it reduces useless 1189 // requirement for correctness but it reduces useless
1188 // interference between temps and other nodes. 1190 // interference between temps and other nodes.
1189 if (mach != NULL && mach->is_MachTemp()) { 1191 if (mach != NULL && mach->is_MachTemp()) {
1190 _bbs.map(self->_idx, LCA); 1192 map_node_to_block(self, LCA);
1191 LCA->add_inst(self); 1193 LCA->add_inst(self);
1192 continue; 1194 continue;
1193 } 1195 }
1194 1196
1195 // Check if 'self' could be anti-dependent on memory 1197 // Check if 'self' could be anti-dependent on memory
1260 if (trace_opto_pipelining()) { 1262 if (trace_opto_pipelining()) {
1261 tty->print("\n---- Start GlobalCodeMotion ----\n"); 1263 tty->print("\n---- Start GlobalCodeMotion ----\n");
1262 } 1264 }
1263 #endif 1265 #endif
1264 1266
1265 // Initialize the bbs.map for things on the proj_list 1267 // Initialize the node to block mapping for things on the proj_list
1266 uint i; 1268 for (uint i = 0; i < proj_list.size(); i++) {
1267 for( i=0; i < proj_list.size(); i++ ) 1269 unmap_node_from_block(proj_list[i]);
1268 _bbs.map(proj_list[i]->_idx, NULL); 1270 }
1269 1271
1270 // Set the basic block for Nodes pinned into blocks 1272 // Set the basic block for Nodes pinned into blocks
1271 Arena *a = Thread::current()->resource_area(); 1273 Arena *a = Thread::current()->resource_area();
1272 VectorSet visited(a); 1274 VectorSet visited(a);
1273 schedule_pinned_nodes( visited ); 1275 schedule_pinned_nodes( visited );
1331 // Feel free to revert to a forward loop for clarity. 1333 // Feel free to revert to a forward loop for clarity.
1332 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { 1334 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
1333 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { 1335 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
1334 Node *proj = matcher._null_check_tests[i ]; 1336 Node *proj = matcher._null_check_tests[i ];
1335 Node *val = matcher._null_check_tests[i+1]; 1337 Node *val = matcher._null_check_tests[i+1];
1336 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); 1338 get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
1337 // The implicit_null_check will only perform the transformation 1339 // The implicit_null_check will only perform the transformation
1338 // if the null branch is truly uncommon, *and* it leads to an 1340 // if the null branch is truly uncommon, *and* it leads to an
1339 // uncommon trap. Combined with the too_many_traps guards 1341 // uncommon trap. Combined with the too_many_traps guards
1340 // above, this prevents SEGV storms reported in 6366351, 1342 // above, this prevents SEGV storms reported in 6366351,
1341 // by recompiling offending methods without this optimization. 1343 // by recompiling offending methods without this optimization.
1351 // Schedule locally. Right now a simple topological sort. 1353 // Schedule locally. Right now a simple topological sort.
1352 // Later, do a real latency aware scheduler. 1354 // Later, do a real latency aware scheduler.
1353 uint max_idx = C->unique(); 1355 uint max_idx = C->unique();
1354 GrowableArray<int> ready_cnt(max_idx, max_idx, -1); 1356 GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
1355 visited.Clear(); 1357 visited.Clear();
1356 for (i = 0; i < _num_blocks; i++) { 1358 for (uint i = 0; i < _num_blocks; i++) {
1357 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { 1359 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
1358 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { 1360 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
1359 C->record_method_not_compilable("local schedule failed"); 1361 C->record_method_not_compilable("local schedule failed");
1360 } 1362 }
1361 return; 1363 return;
1362 } 1364 }
1363 } 1365 }
1364 1366
1365 // If we inserted any instructions between a Call and his CatchNode, 1367 // If we inserted any instructions between a Call and his CatchNode,
1366 // clone the instructions on all paths below the Catch. 1368 // clone the instructions on all paths below the Catch.
1367 for( i=0; i < _num_blocks; i++ ) 1369 for (uint i = 0; i < _num_blocks; i++) {
1368 _blocks[i]->call_catch_cleanup(_bbs, C); 1370 _blocks[i]->call_catch_cleanup(this, C);
1371 }
1369 1372
1370 #ifndef PRODUCT 1373 #ifndef PRODUCT
1371 if (trace_opto_pipelining()) { 1374 if (trace_opto_pipelining()) {
1372 tty->print("\n---- After GlobalCodeMotion ----\n"); 1375 tty->print("\n---- After GlobalCodeMotion ----\n");
1373 for (uint i = 0; i < _num_blocks; i++) { 1376 for (uint i = 0; i < _num_blocks; i++) {
1390 // there once. 1393 // there once.
1391 if (C->do_freq_based_layout()) { 1394 if (C->do_freq_based_layout()) {
1392 Block_List worklist; 1395 Block_List worklist;
1393 Block* root_blk = _blocks[0]; 1396 Block* root_blk = _blocks[0];
1394 for (uint i = 1; i < root_blk->num_preds(); i++) { 1397 for (uint i = 1; i < root_blk->num_preds(); i++) {
1395 Block *pb = _bbs[root_blk->pred(i)->_idx]; 1398 Block *pb = get_block_for_node(root_blk->pred(i));
1396 if (pb->has_uncommon_code()) { 1399 if (pb->has_uncommon_code()) {
1397 worklist.push(pb); 1400 worklist.push(pb);
1398 } 1401 }
1399 } 1402 }
1400 while (worklist.size() > 0) { 1403 while (worklist.size() > 0) {
1401 Block* uct = worklist.pop(); 1404 Block* uct = worklist.pop();
1402 if (uct == _broot) continue; 1405 if (uct == _broot) continue;
1403 for (uint i = 1; i < uct->num_preds(); i++) { 1406 for (uint i = 1; i < uct->num_preds(); i++) {
1404 Block *pb = _bbs[uct->pred(i)->_idx]; 1407 Block *pb = get_block_for_node(uct->pred(i));
1405 if (pb->_num_succs == 1) { 1408 if (pb->_num_succs == 1) {
1406 worklist.push(pb); 1409 worklist.push(pb);
1407 } else if (pb->num_fall_throughs() == 2) { 1410 } else if (pb->num_fall_throughs() == 2) {
1408 pb->update_uncommon_branch(uct); 1411 pb->update_uncommon_branch(uct);
1409 } 1412 }
1428 // force paths ending at uncommon traps to be infrequent 1431 // force paths ending at uncommon traps to be infrequent
1429 if (!C->do_freq_based_layout()) { 1432 if (!C->do_freq_based_layout()) {
1430 Block_List worklist; 1433 Block_List worklist;
1431 Block* root_blk = _blocks[0]; 1434 Block* root_blk = _blocks[0];
1432 for (uint i = 1; i < root_blk->num_preds(); i++) { 1435 for (uint i = 1; i < root_blk->num_preds(); i++) {
1433 Block *pb = _bbs[root_blk->pred(i)->_idx]; 1436 Block *pb = get_block_for_node(root_blk->pred(i));
1434 if (pb->has_uncommon_code()) { 1437 if (pb->has_uncommon_code()) {
1435 worklist.push(pb); 1438 worklist.push(pb);
1436 } 1439 }
1437 } 1440 }
1438 while (worklist.size() > 0) { 1441 while (worklist.size() > 0) {
1439 Block* uct = worklist.pop(); 1442 Block* uct = worklist.pop();
1440 uct->_freq = PROB_MIN; 1443 uct->_freq = PROB_MIN;
1441 for (uint i = 1; i < uct->num_preds(); i++) { 1444 for (uint i = 1; i < uct->num_preds(); i++) {
1442 Block *pb = _bbs[uct->pred(i)->_idx]; 1445 Block *pb = get_block_for_node(uct->pred(i));
1443 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { 1446 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
1444 worklist.push(pb); 1447 worklist.push(pb);
1445 } 1448 }
1446 } 1449 }
1447 } 1450 }
1497 1500
1498 if (b->head()->is_Loop()) { 1501 if (b->head()->is_Loop()) {
1499 Block* loop_head = b; 1502 Block* loop_head = b;
1500 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); 1503 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
1501 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); 1504 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
1502 Block* tail = _bbs[tail_n->_idx]; 1505 Block* tail = get_block_for_node(tail_n);
1503 1506
1504 // Defensively filter out Loop nodes for non-single-entry loops. 1507 // Defensively filter out Loop nodes for non-single-entry loops.
1505 // For all reasonable loops, the head occurs before the tail in RPO. 1508 // For all reasonable loops, the head occurs before the tail in RPO.
1506 if (i <= tail->_rpo) { 1509 if (i <= tail->_rpo) {
1507 1510
1512 CFGLoop* nloop = new CFGLoop(idct++); 1515 CFGLoop* nloop = new CFGLoop(idct++);
1513 assert(loop_head->_loop == NULL, "just checking"); 1516 assert(loop_head->_loop == NULL, "just checking");
1514 loop_head->_loop = nloop; 1517 loop_head->_loop = nloop;
1515 // Add to nloop so push_pred() will skip over inner loops 1518 // Add to nloop so push_pred() will skip over inner loops
1516 nloop->add_member(loop_head); 1519 nloop->add_member(loop_head);
1517 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); 1520 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this);
1518 1521
1519 while (worklist.size() > 0) { 1522 while (worklist.size() > 0) {
1520 Block* member = worklist.pop(); 1523 Block* member = worklist.pop();
1521 if (member != loop_head) { 1524 if (member != loop_head) {
1522 for (uint j = 1; j < member->num_preds(); j++) { 1525 for (uint j = 1; j < member->num_preds(); j++) {
1523 nloop->push_pred(member, j, worklist, _bbs); 1526 nloop->push_pred(member, j, worklist, this);
1524 } 1527 }
1525 } 1528 }
1526 } 1529 }
1527 } 1530 }
1528 } 1531 }
1555 1558
1556 return root_loop; 1559 return root_loop;
1557 } 1560 }
1558 1561
1559 //------------------------------push_pred-------------------------------------- 1562 //------------------------------push_pred--------------------------------------
1560 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { 1563 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) {
1561 Node* pred_n = blk->pred(i); 1564 Node* pred_n = blk->pred(i);
1562 Block* pred = node_to_blk[pred_n->_idx]; 1565 Block* pred = cfg->get_block_for_node(pred_n);
1563 CFGLoop *pred_loop = pred->_loop; 1566 CFGLoop *pred_loop = pred->_loop;
1564 if (pred_loop == NULL) { 1567 if (pred_loop == NULL) {
1565 // Filter out blocks for non-single-entry loops. 1568 // Filter out blocks for non-single-entry loops.
1566 // For all reasonable loops, the head occurs before the tail in RPO. 1569 // For all reasonable loops, the head occurs before the tail in RPO.
1567 if (pred->_rpo > head()->_rpo) { 1570 if (pred->_rpo > head()->_rpo) {
1578 add_nested_loop(pred_loop); 1581 add_nested_loop(pred_loop);
1579 // Continue with loop entry predecessor. 1582 // Continue with loop entry predecessor.
1580 Block* pred_head = pred_loop->head(); 1583 Block* pred_head = pred_loop->head();
1581 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); 1584 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
1582 assert(pred_head != head(), "loop head in only one loop"); 1585 assert(pred_head != head(), "loop head in only one loop");
1583 push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); 1586 push_pred(pred_head, LoopNode::EntryControl, worklist, cfg);
1584 } else { 1587 } else {
1585 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); 1588 assert(pred_loop->_parent == this && _parent == NULL, "just checking");
1586 } 1589 }
1587 } 1590 }
1588 } 1591 }