Mercurial > hg > graal-compiler
comparison src/share/vm/opto/lcm.cpp @ 12171:4b078f877b56
8023988: Move local scheduling of nodes to the CFG creation and code motion phase (PhaseCFG)
Summary: Moved local scheduling code from class Block to class PhaseCFG
Reviewed-by: kvn, roland
author | adlertz |
---|---|
date | Sun, 01 Sep 2013 19:21:05 +0200 |
parents | 650868c062a9 |
children | a9a968364704 |
comparison
equal
deleted
inserted
replaced
12169:29aa8936f03c | 12171:4b078f877b56 |
---|---|
56 // with suitable memory ops nearby. Use the memory op to do the NULL check. | 56 // with suitable memory ops nearby. Use the memory op to do the NULL check. |
57 // I can generate a memory op if there is not one nearby. | 57 // I can generate a memory op if there is not one nearby. |
58 // The proj is the control projection for the not-null case. | 58 // The proj is the control projection for the not-null case. |
59 // The val is the pointer being checked for nullness or | 59 // The val is the pointer being checked for nullness or |
60 // decodeHeapOop_not_null node if it did not fold into address. | 60 // decodeHeapOop_not_null node if it did not fold into address. |
61 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { | 61 void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) { |
62 // Assume if null check need for 0 offset then always needed | 62 // Assume if null check need for 0 offset then always needed |
63 // Intel solaris doesn't support any null checks yet and no | 63 // Intel solaris doesn't support any null checks yet and no |
64 // mechanism exists (yet) to set the switches at an os_cpu level | 64 // mechanism exists (yet) to set the switches at an os_cpu level |
65 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | 65 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; |
66 | 66 |
67 // Make sure the ptr-is-null path appears to be uncommon! | 67 // Make sure the ptr-is-null path appears to be uncommon! |
68 float f = end()->as_MachIf()->_prob; | 68 float f = block->end()->as_MachIf()->_prob; |
69 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | 69 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; |
70 if( f > PROB_UNLIKELY_MAG(4) ) return; | 70 if( f > PROB_UNLIKELY_MAG(4) ) return; |
71 | 71 |
72 uint bidx = 0; // Capture index of value into memop | 72 uint bidx = 0; // Capture index of value into memop |
73 bool was_store; // Memory op is a store op | 73 bool was_store; // Memory op is a store op |
74 | 74 |
75 // Get the successor block for if the test ptr is non-null | 75 // Get the successor block for if the test ptr is non-null |
76 Block* not_null_block; // this one goes with the proj | 76 Block* not_null_block; // this one goes with the proj |
77 Block* null_block; | 77 Block* null_block; |
78 if (get_node(number_of_nodes()-1) == proj) { | 78 if (block->get_node(block->number_of_nodes()-1) == proj) { |
79 null_block = _succs[0]; | 79 null_block = block->_succs[0]; |
80 not_null_block = _succs[1]; | 80 not_null_block = block->_succs[1]; |
81 } else { | 81 } else { |
82 assert(get_node(number_of_nodes()-2) == proj, "proj is one or the other"); | 82 assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other"); |
83 not_null_block = _succs[0]; | 83 not_null_block = block->_succs[0]; |
84 null_block = _succs[1]; | 84 null_block = block->_succs[1]; |
85 } | 85 } |
86 while (null_block->is_Empty() == Block::empty_with_goto) { | 86 while (null_block->is_Empty() == Block::empty_with_goto) { |
87 null_block = null_block->_succs[0]; | 87 null_block = null_block->_succs[0]; |
88 } | 88 } |
89 | 89 |
91 // (See Parse::do_if and Parse::do_ifnull for the reason | 91 // (See Parse::do_if and Parse::do_ifnull for the reason |
92 // we need an uncommon trap. Briefly, we need a way to | 92 // we need an uncommon trap. Briefly, we need a way to |
93 // detect failure of this optimization, as in 6366351.) | 93 // detect failure of this optimization, as in 6366351.) |
94 { | 94 { |
95 bool found_trap = false; | 95 bool found_trap = false; |
96 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | 96 for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) { |
97 Node* nn = null_block->get_node(i1); | 97 Node* nn = null_block->get_node(i1); |
98 if (nn->is_MachCall() && | 98 if (nn->is_MachCall() && |
99 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { | 99 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
100 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); | 100 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
101 if (trtype->isa_int() && trtype->is_int()->is_con()) { | 101 if (trtype->isa_int() && trtype->is_int()->is_con()) { |
235 continue; // Give up is reference is beyond 4K page size | 235 continue; // Give up is reference is beyond 4K page size |
236 } | 236 } |
237 } | 237 } |
238 | 238 |
239 // Check ctrl input to see if the null-check dominates the memory op | 239 // Check ctrl input to see if the null-check dominates the memory op |
240 Block *cb = cfg->get_block_for_node(mach); | 240 Block *cb = get_block_for_node(mach); |
241 cb = cb->_idom; // Always hoist at least 1 block | 241 cb = cb->_idom; // Always hoist at least 1 block |
242 if( !was_store ) { // Stores can be hoisted only one block | 242 if( !was_store ) { // Stores can be hoisted only one block |
243 while( cb->_dom_depth > (_dom_depth + 1)) | 243 while( cb->_dom_depth > (block->_dom_depth + 1)) |
244 cb = cb->_idom; // Hoist loads as far as we want | 244 cb = cb->_idom; // Hoist loads as far as we want |
245 // The non-null-block should dominate the memory op, too. Live | 245 // The non-null-block should dominate the memory op, too. Live |
246 // range spilling will insert a spill in the non-null-block if it is | 246 // range spilling will insert a spill in the non-null-block if it is |
247 // needs to spill the memory op for an implicit null check. | 247 // needs to spill the memory op for an implicit null check. |
248 if (cb->_dom_depth == (_dom_depth + 1)) { | 248 if (cb->_dom_depth == (block->_dom_depth + 1)) { |
249 if (cb != not_null_block) continue; | 249 if (cb != not_null_block) continue; |
250 cb = cb->_idom; | 250 cb = cb->_idom; |
251 } | 251 } |
252 } | 252 } |
253 if( cb != this ) continue; | 253 if( cb != block ) continue; |
254 | 254 |
255 // Found a memory user; see if it can be hoisted to check-block | 255 // Found a memory user; see if it can be hoisted to check-block |
256 uint vidx = 0; // Capture index of value into memop | 256 uint vidx = 0; // Capture index of value into memop |
257 uint j; | 257 uint j; |
258 for( j = mach->req()-1; j > 0; j-- ) { | 258 for( j = mach->req()-1; j > 0; j-- ) { |
260 vidx = j; | 260 vidx = j; |
261 // Ignore DecodeN val which could be hoisted to where needed. | 261 // Ignore DecodeN val which could be hoisted to where needed. |
262 if( is_decoden ) continue; | 262 if( is_decoden ) continue; |
263 } | 263 } |
264 // Block of memory-op input | 264 // Block of memory-op input |
265 Block *inb = cfg->get_block_for_node(mach->in(j)); | 265 Block *inb = get_block_for_node(mach->in(j)); |
266 Block *b = this; // Start from nul check | 266 Block *b = block; // Start from nul check |
267 while( b != inb && b->_dom_depth > inb->_dom_depth ) | 267 while( b != inb && b->_dom_depth > inb->_dom_depth ) |
268 b = b->_idom; // search upwards for input | 268 b = b->_idom; // search upwards for input |
269 // See if input dominates null check | 269 // See if input dominates null check |
270 if( b != inb ) | 270 if( b != inb ) |
271 break; | 271 break; |
272 } | 272 } |
273 if( j > 0 ) | 273 if( j > 0 ) |
274 continue; | 274 continue; |
275 Block *mb = cfg->get_block_for_node(mach); | 275 Block *mb = get_block_for_node(mach); |
276 // Hoisting stores requires more checks for the anti-dependence case. | 276 // Hoisting stores requires more checks for the anti-dependence case. |
277 // Give up hoisting if we have to move the store past any load. | 277 // Give up hoisting if we have to move the store past any load. |
278 if( was_store ) { | 278 if( was_store ) { |
279 Block *b = mb; // Start searching here for a local load | 279 Block *b = mb; // Start searching here for a local load |
280 // mach use (faulting) trying to hoist | 280 // mach use (faulting) trying to hoist |
281 // n might be blocker to hoisting | 281 // n might be blocker to hoisting |
282 while( b != this ) { | 282 while( b != block ) { |
283 uint k; | 283 uint k; |
284 for( k = 1; k < b->_nodes.size(); k++ ) { | 284 for( k = 1; k < b->number_of_nodes(); k++ ) { |
285 Node *n = b->get_node(k); | 285 Node *n = b->get_node(k); |
286 if( n->needs_anti_dependence_check() && | 286 if( n->needs_anti_dependence_check() && |
287 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | 287 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) |
288 break; // Found anti-dependent load | 288 break; // Found anti-dependent load |
289 } | 289 } |
290 if( k < b->_nodes.size() ) | 290 if( k < b->number_of_nodes() ) |
291 break; // Found anti-dependent load | 291 break; // Found anti-dependent load |
292 // Make sure control does not do a merge (would have to check allpaths) | 292 // Make sure control does not do a merge (would have to check allpaths) |
293 if( b->num_preds() != 2 ) break; | 293 if( b->num_preds() != 2 ) break; |
294 b = cfg->get_block_for_node(b->pred(1)); // Move up to predecessor block | 294 b = get_block_for_node(b->pred(1)); // Move up to predecessor block |
295 } | 295 } |
296 if( b != this ) continue; | 296 if( b != block ) continue; |
297 } | 297 } |
298 | 298 |
299 // Make sure this memory op is not already being used for a NullCheck | 299 // Make sure this memory op is not already being used for a NullCheck |
300 Node *e = mb->end(); | 300 Node *e = mb->end(); |
301 if( e->is_MachNullCheck() && e->in(1) == mach ) | 301 if( e->is_MachNullCheck() && e->in(1) == mach ) |
302 continue; // Already being used as a NULL check | 302 continue; // Already being used as a NULL check |
303 | 303 |
304 // Found a candidate! Pick one with least dom depth - the highest | 304 // Found a candidate! Pick one with least dom depth - the highest |
305 // in the dom tree should be closest to the null check. | 305 // in the dom tree should be closest to the null check. |
306 if (best == NULL || cfg->get_block_for_node(mach)->_dom_depth < cfg->get_block_for_node(best)->_dom_depth) { | 306 if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) { |
307 best = mach; | 307 best = mach; |
308 bidx = vidx; | 308 bidx = vidx; |
309 } | 309 } |
310 } | 310 } |
311 // No candidate! | 311 // No candidate! |
317 extern int implicit_null_checks; | 317 extern int implicit_null_checks; |
318 implicit_null_checks++; | 318 implicit_null_checks++; |
319 | 319 |
320 if( is_decoden ) { | 320 if( is_decoden ) { |
321 // Check if we need to hoist decodeHeapOop_not_null first. | 321 // Check if we need to hoist decodeHeapOop_not_null first. |
322 Block *valb = cfg->get_block_for_node(val); | 322 Block *valb = get_block_for_node(val); |
323 if( this != valb && this->_dom_depth < valb->_dom_depth ) { | 323 if( block != valb && block->_dom_depth < valb->_dom_depth ) { |
324 // Hoist it up to the end of the test block. | 324 // Hoist it up to the end of the test block. |
325 valb->find_remove(val); | 325 valb->find_remove(val); |
326 this->add_inst(val); | 326 block->add_inst(val); |
327 cfg->map_node_to_block(val, this); | 327 map_node_to_block(val, block); |
328 // DecodeN on x86 may kill flags. Check for flag-killing projections | 328 // DecodeN on x86 may kill flags. Check for flag-killing projections |
329 // that also need to be hoisted. | 329 // that also need to be hoisted. |
330 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { | 330 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
331 Node* n = val->fast_out(j); | 331 Node* n = val->fast_out(j); |
332 if( n->is_MachProj() ) { | 332 if( n->is_MachProj() ) { |
333 cfg->get_block_for_node(n)->find_remove(n); | 333 get_block_for_node(n)->find_remove(n); |
334 this->add_inst(n); | 334 block->add_inst(n); |
335 cfg->map_node_to_block(n, this); | 335 map_node_to_block(n, block); |
336 } | 336 } |
337 } | 337 } |
338 } | 338 } |
339 } | 339 } |
340 // Hoist the memory candidate up to the end of the test block. | 340 // Hoist the memory candidate up to the end of the test block. |
341 Block *old_block = cfg->get_block_for_node(best); | 341 Block *old_block = get_block_for_node(best); |
342 old_block->find_remove(best); | 342 old_block->find_remove(best); |
343 add_inst(best); | 343 block->add_inst(best); |
344 cfg->map_node_to_block(best, this); | 344 map_node_to_block(best, block); |
345 | 345 |
346 // Move the control dependence | 346 // Move the control dependence |
347 if (best->in(0) && best->in(0) == old_block->head()) | 347 if (best->in(0) && best->in(0) == old_block->head()) |
348 best->set_req(0, head()); | 348 best->set_req(0, block->head()); |
349 | 349 |
350 // Check for flag-killing projections that also need to be hoisted | 350 // Check for flag-killing projections that also need to be hoisted |
351 // Should be DU safe because no edge updates. | 351 // Should be DU safe because no edge updates. |
352 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | 352 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { |
353 Node* n = best->fast_out(j); | 353 Node* n = best->fast_out(j); |
354 if( n->is_MachProj() ) { | 354 if( n->is_MachProj() ) { |
355 cfg->get_block_for_node(n)->find_remove(n); | 355 get_block_for_node(n)->find_remove(n); |
356 add_inst(n); | 356 block->add_inst(n); |
357 cfg->map_node_to_block(n, this); | 357 map_node_to_block(n, block); |
358 } | 358 } |
359 } | 359 } |
360 | 360 |
361 Compile *C = cfg->C; | |
362 // proj==Op_True --> ne test; proj==Op_False --> eq test. | 361 // proj==Op_True --> ne test; proj==Op_False --> eq test. |
363 // One of two graph shapes got matched: | 362 // One of two graph shapes got matched: |
364 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | 363 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) |
365 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | 364 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) |
366 // NULL checks are always branch-if-eq. If we see a IfTrue projection | 365 // NULL checks are always branch-if-eq. If we see a IfTrue projection |
367 // then we are replacing a 'ne' test with a 'eq' NULL check test. | 366 // then we are replacing a 'ne' test with a 'eq' NULL check test. |
368 // We need to flip the projections to keep the same semantics. | 367 // We need to flip the projections to keep the same semantics. |
369 if( proj->Opcode() == Op_IfTrue ) { | 368 if( proj->Opcode() == Op_IfTrue ) { |
370 // Swap order of projections in basic block to swap branch targets | 369 // Swap order of projections in basic block to swap branch targets |
371 Node *tmp1 = get_node(end_idx()+1); | 370 Node *tmp1 = block->get_node(block->end_idx()+1); |
372 Node *tmp2 = get_node(end_idx()+2); | 371 Node *tmp2 = block->get_node(block->end_idx()+2); |
373 _nodes.map(end_idx()+1, tmp2); | 372 block->map_node(tmp2, block->end_idx()+1); |
374 _nodes.map(end_idx()+2, tmp1); | 373 block->map_node(tmp1, block->end_idx()+2); |
375 Node *tmp = new (C) Node(C->top()); // Use not NULL input | 374 Node *tmp = new (C) Node(C->top()); // Use not NULL input |
376 tmp1->replace_by(tmp); | 375 tmp1->replace_by(tmp); |
377 tmp2->replace_by(tmp1); | 376 tmp2->replace_by(tmp1); |
378 tmp->replace_by(tmp2); | 377 tmp->replace_by(tmp2); |
379 tmp->destruct(); | 378 tmp->destruct(); |
382 // Remove the existing null check; use a new implicit null check instead. | 381 // Remove the existing null check; use a new implicit null check instead. |
383 // Since schedule-local needs precise def-use info, we need to correct | 382 // Since schedule-local needs precise def-use info, we need to correct |
384 // it as well. | 383 // it as well. |
385 Node *old_tst = proj->in(0); | 384 Node *old_tst = proj->in(0); |
386 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | 385 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); |
387 _nodes.map(end_idx(),nul_chk); | 386 block->map_node(nul_chk, block->end_idx()); |
388 cfg->map_node_to_block(nul_chk, this); | 387 map_node_to_block(nul_chk, block); |
389 // Redirect users of old_test to nul_chk | 388 // Redirect users of old_test to nul_chk |
390 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | 389 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) |
391 old_tst->last_out(i2)->set_req(0, nul_chk); | 390 old_tst->last_out(i2)->set_req(0, nul_chk); |
392 // Clean-up any dead code | 391 // Clean-up any dead code |
393 for (uint i3 = 0; i3 < old_tst->req(); i3++) | 392 for (uint i3 = 0; i3 < old_tst->req(); i3++) |
394 old_tst->set_req(i3, NULL); | 393 old_tst->set_req(i3, NULL); |
395 | 394 |
396 cfg->latency_from_uses(nul_chk); | 395 latency_from_uses(nul_chk); |
397 cfg->latency_from_uses(best); | 396 latency_from_uses(best); |
398 } | 397 } |
399 | 398 |
400 | 399 |
401 //------------------------------select----------------------------------------- | 400 //------------------------------select----------------------------------------- |
402 // Select a nice fellow from the worklist to schedule next. If there is only | 401 // Select a nice fellow from the worklist to schedule next. If there is only |
406 // These are chosen immediately. Some instructions are required to immediately | 405 // These are chosen immediately. Some instructions are required to immediately |
407 // precede the last instruction in the block, and these are taken last. Of the | 406 // precede the last instruction in the block, and these are taken last. Of the |
408 // remaining cases (most), choose the instruction with the greatest latency | 407 // remaining cases (most), choose the instruction with the greatest latency |
409 // (that is, the most number of pseudo-cycles required to the end of the | 408 // (that is, the most number of pseudo-cycles required to the end of the |
410 // routine). If there is a tie, choose the instruction with the most inputs. | 409 // routine). If there is a tie, choose the instruction with the most inputs. |
411 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { | 410 Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { |
412 | 411 |
413 // If only a single entry on the stack, use it | 412 // If only a single entry on the stack, use it |
414 uint cnt = worklist.size(); | 413 uint cnt = worklist.size(); |
415 if (cnt == 1) { | 414 if (cnt == 1) { |
416 Node *n = worklist[0]; | 415 Node *n = worklist[0]; |
440 worklist.map(i,worklist.pop()); | 439 worklist.map(i,worklist.pop()); |
441 return n; | 440 return n; |
442 } | 441 } |
443 | 442 |
444 // Final call in a block must be adjacent to 'catch' | 443 // Final call in a block must be adjacent to 'catch' |
445 Node *e = end(); | 444 Node *e = block->end(); |
446 if( e->is_Catch() && e->in(0)->in(0) == n ) | 445 if( e->is_Catch() && e->in(0)->in(0) == n ) |
447 continue; | 446 continue; |
448 | 447 |
449 // Memory op for an implicit null check has to be at the end of the block | 448 // Memory op for an implicit null check has to be at the end of the block |
450 if( e->is_MachNullCheck() && e->in(1) == n ) | 449 if( e->is_MachNullCheck() && e->in(1) == n ) |
466 | 465 |
467 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 466 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
468 Node* use = n->fast_out(j); | 467 Node* use = n->fast_out(j); |
469 | 468 |
470 // The use is a conditional branch, make them adjacent | 469 // The use is a conditional branch, make them adjacent |
471 if (use->is_MachIf() && cfg->get_block_for_node(use) == this) { | 470 if (use->is_MachIf() && get_block_for_node(use) == block) { |
472 found_machif = true; | 471 found_machif = true; |
473 break; | 472 break; |
474 } | 473 } |
475 | 474 |
476 // More than this instruction pending for successor to be ready, | 475 // More than this instruction pending for successor to be ready, |
499 // MachTemps should be scheduled last so they are near their uses | 498 // MachTemps should be scheduled last so they are near their uses |
500 if (n->is_MachTemp()) { | 499 if (n->is_MachTemp()) { |
501 n_choice = 1; | 500 n_choice = 1; |
502 } | 501 } |
503 | 502 |
504 uint n_latency = cfg->get_latency_for_node(n); | 503 uint n_latency = get_latency_for_node(n); |
505 uint n_score = n->req(); // Many inputs get high score to break ties | 504 uint n_score = n->req(); // Many inputs get high score to break ties |
506 | 505 |
507 // Keep best latency found | 506 // Keep best latency found |
508 cand_cnt++; | 507 cand_cnt++; |
509 if (choice < n_choice || | 508 if (choice < n_choice || |
527 return n; | 526 return n; |
528 } | 527 } |
529 | 528 |
530 | 529 |
531 //------------------------------set_next_call---------------------------------- | 530 //------------------------------set_next_call---------------------------------- |
532 void Block::set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg) { | 531 void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) { |
533 if( next_call.test_set(n->_idx) ) return; | 532 if( next_call.test_set(n->_idx) ) return; |
534 for( uint i=0; i<n->len(); i++ ) { | 533 for( uint i=0; i<n->len(); i++ ) { |
535 Node *m = n->in(i); | 534 Node *m = n->in(i); |
536 if( !m ) continue; // must see all nodes in block that precede call | 535 if( !m ) continue; // must see all nodes in block that precede call |
537 if (cfg->get_block_for_node(m) == this) { | 536 if (get_block_for_node(m) == block) { |
538 set_next_call(m, next_call, cfg); | 537 set_next_call(block, m, next_call); |
539 } | 538 } |
540 } | 539 } |
541 } | 540 } |
542 | 541 |
543 //------------------------------needed_for_next_call--------------------------- | 542 //------------------------------needed_for_next_call--------------------------- |
544 // Set the flag 'next_call' for each Node that is needed for the next call to | 543 // Set the flag 'next_call' for each Node that is needed for the next call to |
545 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | 544 // be scheduled. This flag lets me bias scheduling so Nodes needed for the |
546 // next subroutine call get priority - basically it moves things NOT needed | 545 // next subroutine call get priority - basically it moves things NOT needed |
547 // for the next call till after the call. This prevents me from trying to | 546 // for the next call till after the call. This prevents me from trying to |
548 // carry lots of stuff live across a call. | 547 // carry lots of stuff live across a call. |
549 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg) { | 548 void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) { |
550 // Find the next control-defining Node in this block | 549 // Find the next control-defining Node in this block |
551 Node* call = NULL; | 550 Node* call = NULL; |
552 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | 551 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { |
553 Node* m = this_call->fast_out(i); | 552 Node* m = this_call->fast_out(i); |
554 if(cfg->get_block_for_node(m) == this && // Local-block user | 553 if(get_block_for_node(m) == block && // Local-block user |
555 m != this_call && // Not self-start node | 554 m != this_call && // Not self-start node |
556 m->is_MachCall() ) | 555 m->is_MachCall() ) |
557 call = m; | 556 call = m; |
558 break; | 557 break; |
559 } | 558 } |
560 if (call == NULL) return; // No next call (e.g., block end is near) | 559 if (call == NULL) return; // No next call (e.g., block end is near) |
561 // Set next-call for all inputs to this call | 560 // Set next-call for all inputs to this call |
562 set_next_call(call, next_call, cfg); | 561 set_next_call(block, call, next_call); |
563 } | 562 } |
564 | 563 |
565 //------------------------------add_call_kills------------------------------------- | 564 //------------------------------add_call_kills------------------------------------- |
566 void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { | 565 // helper function that adds caller save registers to MachProjNode |
566 static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { | |
567 // Fill in the kill mask for the call | 567 // Fill in the kill mask for the call |
568 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { | 568 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { |
569 if( !regs.Member(r) ) { // Not already defined by the call | 569 if( !regs.Member(r) ) { // Not already defined by the call |
570 // Save-on-call register? | 570 // Save-on-call register? |
571 if ((save_policy[r] == 'C') || | 571 if ((save_policy[r] == 'C') || |
577 } | 577 } |
578 } | 578 } |
579 | 579 |
580 | 580 |
581 //------------------------------sched_call------------------------------------- | 581 //------------------------------sched_call------------------------------------- |
582 uint Block::sched_call( Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { | 582 uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) { |
583 RegMask regs; | 583 RegMask regs; |
584 | 584 |
585 // Schedule all the users of the call right now. All the users are | 585 // Schedule all the users of the call right now. All the users are |
586 // projection Nodes, so they must be scheduled next to the call. | 586 // projection Nodes, so they must be scheduled next to the call. |
587 // Collect all the defined registers. | 587 // Collect all the defined registers. |
590 assert( n->is_MachProj(), "" ); | 590 assert( n->is_MachProj(), "" ); |
591 int n_cnt = ready_cnt.at(n->_idx)-1; | 591 int n_cnt = ready_cnt.at(n->_idx)-1; |
592 ready_cnt.at_put(n->_idx, n_cnt); | 592 ready_cnt.at_put(n->_idx, n_cnt); |
593 assert( n_cnt == 0, "" ); | 593 assert( n_cnt == 0, "" ); |
594 // Schedule next to call | 594 // Schedule next to call |
595 _nodes.map(node_cnt++, n); | 595 block->map_node(n, node_cnt++); |
596 // Collect defined registers | 596 // Collect defined registers |
597 regs.OR(n->out_RegMask()); | 597 regs.OR(n->out_RegMask()); |
598 // Check for scheduling the next control-definer | 598 // Check for scheduling the next control-definer |
599 if( n->bottom_type() == Type::CONTROL ) | 599 if( n->bottom_type() == Type::CONTROL ) |
600 // Warm up next pile of heuristic bits | 600 // Warm up next pile of heuristic bits |
601 needed_for_next_call(n, next_call, cfg); | 601 needed_for_next_call(block, n, next_call); |
602 | 602 |
603 // Children of projections are now all ready | 603 // Children of projections are now all ready |
604 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 604 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
605 Node* m = n->fast_out(j); // Get user | 605 Node* m = n->fast_out(j); // Get user |
606 if(cfg->get_block_for_node(m) != this) { | 606 if(get_block_for_node(m) != block) { |
607 continue; | 607 continue; |
608 } | 608 } |
609 if( m->is_Phi() ) continue; | 609 if( m->is_Phi() ) continue; |
610 int m_cnt = ready_cnt.at(m->_idx)-1; | 610 int m_cnt = ready_cnt.at(m->_idx)-1; |
611 ready_cnt.at_put(m->_idx, m_cnt); | 611 ready_cnt.at_put(m->_idx, m_cnt); |
615 | 615 |
616 } | 616 } |
617 | 617 |
618 // Act as if the call defines the Frame Pointer. | 618 // Act as if the call defines the Frame Pointer. |
619 // Certainly the FP is alive and well after the call. | 619 // Certainly the FP is alive and well after the call. |
620 regs.Insert(matcher.c_frame_pointer()); | 620 regs.Insert(_matcher.c_frame_pointer()); |
621 | 621 |
622 // Set all registers killed and not already defined by the call. | 622 // Set all registers killed and not already defined by the call. |
623 uint r_cnt = mcall->tf()->range()->cnt(); | 623 uint r_cnt = mcall->tf()->range()->cnt(); |
624 int op = mcall->ideal_Opcode(); | 624 int op = mcall->ideal_Opcode(); |
625 MachProjNode *proj = new (matcher.C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); | 625 MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); |
626 cfg->map_node_to_block(proj, this); | 626 map_node_to_block(proj, block); |
627 insert_node(proj, node_cnt++); | 627 block->insert_node(proj, node_cnt++); |
628 | 628 |
629 // Select the right register save policy. | 629 // Select the right register save policy. |
630 const char * save_policy; | 630 const char * save_policy; |
631 switch (op) { | 631 switch (op) { |
632 case Op_CallRuntime: | 632 case Op_CallRuntime: |
633 case Op_CallLeaf: | 633 case Op_CallLeaf: |
634 case Op_CallLeafNoFP: | 634 case Op_CallLeafNoFP: |
635 // Calling C code so use C calling convention | 635 // Calling C code so use C calling convention |
636 save_policy = matcher._c_reg_save_policy; | 636 save_policy = _matcher._c_reg_save_policy; |
637 break; | 637 break; |
638 | 638 |
639 case Op_CallStaticJava: | 639 case Op_CallStaticJava: |
640 case Op_CallDynamicJava: | 640 case Op_CallDynamicJava: |
641 // Calling Java code so use Java calling convention | 641 // Calling Java code so use Java calling convention |
642 save_policy = matcher._register_save_policy; | 642 save_policy = _matcher._register_save_policy; |
643 break; | 643 break; |
644 | 644 |
645 default: | 645 default: |
646 ShouldNotReachHere(); | 646 ShouldNotReachHere(); |
647 } | 647 } |
672 } | 672 } |
673 | 673 |
674 | 674 |
675 //------------------------------schedule_local--------------------------------- | 675 //------------------------------schedule_local--------------------------------- |
676 // Topological sort within a block. Someday become a real scheduler. | 676 // Topological sort within a block. Someday become a real scheduler. |
677 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) { | 677 bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { |
678 // Already "sorted" are the block start Node (as the first entry), and | 678 // Already "sorted" are the block start Node (as the first entry), and |
679 // the block-ending Node and any trailing control projections. We leave | 679 // the block-ending Node and any trailing control projections. We leave |
680 // these alone. PhiNodes and ParmNodes are made to follow the block start | 680 // these alone. PhiNodes and ParmNodes are made to follow the block start |
681 // Node. Everything else gets topo-sorted. | 681 // Node. Everything else gets topo-sorted. |
682 | 682 |
683 #ifndef PRODUCT | 683 #ifndef PRODUCT |
684 if (cfg->trace_opto_pipelining()) { | 684 if (trace_opto_pipelining()) { |
685 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | 685 tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); |
686 for (uint i = 0;i < _nodes.size();i++) { | 686 for (uint i = 0;i < block->number_of_nodes(); i++) { |
687 tty->print("# "); | 687 tty->print("# "); |
688 get_node(i)->fast_dump(); | 688 block->get_node(i)->fast_dump(); |
689 } | 689 } |
690 tty->print_cr("#"); | 690 tty->print_cr("#"); |
691 } | 691 } |
692 #endif | 692 #endif |
693 | 693 |
694 // RootNode is already sorted | 694 // RootNode is already sorted |
695 if( _nodes.size() == 1 ) return true; | 695 if (block->number_of_nodes() == 1) { |
696 return true; | |
697 } | |
696 | 698 |
697 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | 699 // Move PhiNodes and ParmNodes from 1 to cnt up to the start |
698 uint node_cnt = end_idx(); | 700 uint node_cnt = block->end_idx(); |
699 uint phi_cnt = 1; | 701 uint phi_cnt = 1; |
700 uint i; | 702 uint i; |
701 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | 703 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi |
702 Node *n = get_node(i); | 704 Node *n = block->get_node(i); |
703 if( n->is_Phi() || // Found a PhiNode or ParmNode | 705 if( n->is_Phi() || // Found a PhiNode or ParmNode |
704 (n->is_Proj() && n->in(0) == head()) ) { | 706 (n->is_Proj() && n->in(0) == block->head()) ) { |
705 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | 707 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt |
706 _nodes.map(i,get_node(phi_cnt)); | 708 block->map_node(block->get_node(phi_cnt), i); |
707 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | 709 block->map_node(n, phi_cnt++); // swap Phi/Parm up front |
708 } else { // All others | 710 } else { // All others |
709 // Count block-local inputs to 'n' | 711 // Count block-local inputs to 'n' |
710 uint cnt = n->len(); // Input count | 712 uint cnt = n->len(); // Input count |
711 uint local = 0; | 713 uint local = 0; |
712 for( uint j=0; j<cnt; j++ ) { | 714 for( uint j=0; j<cnt; j++ ) { |
713 Node *m = n->in(j); | 715 Node *m = n->in(j); |
714 if( m && cfg->get_block_for_node(m) == this && !m->is_top() ) | 716 if( m && get_block_for_node(m) == block && !m->is_top() ) |
715 local++; // One more block-local input | 717 local++; // One more block-local input |
716 } | 718 } |
717 ready_cnt.at_put(n->_idx, local); // Count em up | 719 ready_cnt.at_put(n->_idx, local); // Count em up |
718 | 720 |
719 #ifdef ASSERT | 721 #ifdef ASSERT |
721 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { | 723 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
722 // Check the precedence edges | 724 // Check the precedence edges |
723 for (uint prec = n->req(); prec < n->len(); prec++) { | 725 for (uint prec = n->req(); prec < n->len(); prec++) { |
724 Node* oop_store = n->in(prec); | 726 Node* oop_store = n->in(prec); |
725 if (oop_store != NULL) { | 727 if (oop_store != NULL) { |
726 assert(cfg->get_block_for_node(oop_store)->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); | 728 assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); |
727 } | 729 } |
728 } | 730 } |
729 } | 731 } |
730 } | 732 } |
731 #endif | 733 #endif |
745 n->del_req(TypeFunc::Parms); | 747 n->del_req(TypeFunc::Parms); |
746 n->add_prec(x); | 748 n->add_prec(x); |
747 } | 749 } |
748 } | 750 } |
749 } | 751 } |
750 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | 752 for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count |
751 ready_cnt.at_put(get_node(i2)->_idx, 0); | 753 ready_cnt.at_put(block->get_node(i2)->_idx, 0); |
752 | 754 |
753 // All the prescheduled guys do not hold back internal nodes | 755 // All the prescheduled guys do not hold back internal nodes |
754 uint i3; | 756 uint i3; |
755 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | 757 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled |
756 Node *n = get_node(i3); // Get pre-scheduled | 758 Node *n = block->get_node(i3); // Get pre-scheduled |
757 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 759 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
758 Node* m = n->fast_out(j); | 760 Node* m = n->fast_out(j); |
759 if (cfg->get_block_for_node(m) == this) { // Local-block user | 761 if (get_block_for_node(m) == block) { // Local-block user |
760 int m_cnt = ready_cnt.at(m->_idx)-1; | 762 int m_cnt = ready_cnt.at(m->_idx)-1; |
761 ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count | 763 ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count |
762 } | 764 } |
763 } | 765 } |
764 } | 766 } |
765 | 767 |
766 Node_List delay; | 768 Node_List delay; |
767 // Make a worklist | 769 // Make a worklist |
768 Node_List worklist; | 770 Node_List worklist; |
769 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | 771 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist |
770 Node *m = get_node(i4); | 772 Node *m = block->get_node(i4); |
771 if( !ready_cnt.at(m->_idx) ) { // Zero ready count? | 773 if( !ready_cnt.at(m->_idx) ) { // Zero ready count? |
772 if (m->is_iteratively_computed()) { | 774 if (m->is_iteratively_computed()) { |
773 // Push induction variable increments last to allow other uses | 775 // Push induction variable increments last to allow other uses |
774 // of the phi to be scheduled first. The select() method breaks | 776 // of the phi to be scheduled first. The select() method breaks |
775 // ties in scheduling by worklist order. | 777 // ties in scheduling by worklist order. |
787 Node* d = delay.pop(); | 789 Node* d = delay.pop(); |
788 worklist.push(d); | 790 worklist.push(d); |
789 } | 791 } |
790 | 792 |
791 // Warm up the 'next_call' heuristic bits | 793 // Warm up the 'next_call' heuristic bits |
792 needed_for_next_call(head(), next_call, cfg); | 794 needed_for_next_call(block, block->head(), next_call); |
793 | 795 |
794 #ifndef PRODUCT | 796 #ifndef PRODUCT |
795 if (cfg->trace_opto_pipelining()) { | 797 if (trace_opto_pipelining()) { |
796 for (uint j=0; j<_nodes.size(); j++) { | 798 for (uint j=0; j< block->number_of_nodes(); j++) { |
797 Node *n = get_node(j); | 799 Node *n = block->get_node(j); |
798 int idx = n->_idx; | 800 int idx = n->_idx; |
799 tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); | 801 tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); |
800 tty->print("latency:%3d ", cfg->get_latency_for_node(n)); | 802 tty->print("latency:%3d ", get_latency_for_node(n)); |
801 tty->print("%4d: %s\n", idx, n->Name()); | 803 tty->print("%4d: %s\n", idx, n->Name()); |
802 } | 804 } |
803 } | 805 } |
804 #endif | 806 #endif |
805 | 807 |
806 uint max_idx = (uint)ready_cnt.length(); | 808 uint max_idx = (uint)ready_cnt.length(); |
807 // Pull from worklist and schedule | 809 // Pull from worklist and schedule |
808 while( worklist.size() ) { // Worklist is not ready | 810 while( worklist.size() ) { // Worklist is not ready |
809 | 811 |
810 #ifndef PRODUCT | 812 #ifndef PRODUCT |
811 if (cfg->trace_opto_pipelining()) { | 813 if (trace_opto_pipelining()) { |
812 tty->print("# ready list:"); | 814 tty->print("# ready list:"); |
813 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | 815 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist |
814 Node *n = worklist[i]; // Get Node on worklist | 816 Node *n = worklist[i]; // Get Node on worklist |
815 tty->print(" %d", n->_idx); | 817 tty->print(" %d", n->_idx); |
816 } | 818 } |
817 tty->cr(); | 819 tty->cr(); |
818 } | 820 } |
819 #endif | 821 #endif |
820 | 822 |
821 // Select and pop a ready guy from worklist | 823 // Select and pop a ready guy from worklist |
822 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | 824 Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); |
823 _nodes.map(phi_cnt++,n); // Schedule him next | 825 block->map_node(n, phi_cnt++); // Schedule him next |
824 | 826 |
825 #ifndef PRODUCT | 827 #ifndef PRODUCT |
826 if (cfg->trace_opto_pipelining()) { | 828 if (trace_opto_pipelining()) { |
827 tty->print("# select %d: %s", n->_idx, n->Name()); | 829 tty->print("# select %d: %s", n->_idx, n->Name()); |
828 tty->print(", latency:%d", cfg->get_latency_for_node(n)); | 830 tty->print(", latency:%d", get_latency_for_node(n)); |
829 n->dump(); | 831 n->dump(); |
830 if (Verbose) { | 832 if (Verbose) { |
831 tty->print("# ready list:"); | 833 tty->print("# ready list:"); |
832 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | 834 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist |
833 Node *n = worklist[i]; // Get Node on worklist | 835 Node *n = worklist[i]; // Get Node on worklist |
838 } | 840 } |
839 | 841 |
840 #endif | 842 #endif |
841 if( n->is_MachCall() ) { | 843 if( n->is_MachCall() ) { |
842 MachCallNode *mcall = n->as_MachCall(); | 844 MachCallNode *mcall = n->as_MachCall(); |
843 phi_cnt = sched_call(matcher, cfg, phi_cnt, worklist, ready_cnt, mcall, next_call); | 845 phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); |
844 continue; | 846 continue; |
845 } | 847 } |
846 | 848 |
847 if (n->is_Mach() && n->as_Mach()->has_call()) { | 849 if (n->is_Mach() && n->as_Mach()->has_call()) { |
848 RegMask regs; | 850 RegMask regs; |
849 regs.Insert(matcher.c_frame_pointer()); | 851 regs.Insert(_matcher.c_frame_pointer()); |
850 regs.OR(n->out_RegMask()); | 852 regs.OR(n->out_RegMask()); |
851 | 853 |
852 MachProjNode *proj = new (matcher.C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); | 854 MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); |
853 cfg->map_node_to_block(proj, this); | 855 map_node_to_block(proj, block); |
854 insert_node(proj, phi_cnt++); | 856 block->insert_node(proj, phi_cnt++); |
855 | 857 |
856 add_call_kills(proj, regs, matcher._c_reg_save_policy, false); | 858 add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); |
857 } | 859 } |
858 | 860 |
859 // Children are now all ready | 861 // Children are now all ready |
860 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | 862 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { |
861 Node* m = n->fast_out(i5); // Get user | 863 Node* m = n->fast_out(i5); // Get user |
862 if (cfg->get_block_for_node(m) != this) { | 864 if (get_block_for_node(m) != block) { |
863 continue; | 865 continue; |
864 } | 866 } |
865 if( m->is_Phi() ) continue; | 867 if( m->is_Phi() ) continue; |
866 if (m->_idx >= max_idx) { // new node, skip it | 868 if (m->_idx >= max_idx) { // new node, skip it |
867 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); | 869 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); |
872 if( m_cnt == 0 ) | 874 if( m_cnt == 0 ) |
873 worklist.push(m); | 875 worklist.push(m); |
874 } | 876 } |
875 } | 877 } |
876 | 878 |
877 if( phi_cnt != end_idx() ) { | 879 if( phi_cnt != block->end_idx() ) { |
878 // did not schedule all. Retry, Bailout, or Die | 880 // did not schedule all. Retry, Bailout, or Die |
879 Compile* C = matcher.C; | |
880 if (C->subsume_loads() == true && !C->failing()) { | 881 if (C->subsume_loads() == true && !C->failing()) { |
881 // Retry with subsume_loads == false | 882 // Retry with subsume_loads == false |
882 // If this is the first failure, the sentinel string will "stick" | 883 // If this is the first failure, the sentinel string will "stick" |
883 // to the Compile object, and the C2Compiler will see it and retry. | 884 // to the Compile object, and the C2Compiler will see it and retry. |
884 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | 885 C->record_failure(C2Compiler::retry_no_subsuming_loads()); |
886 // assert( phi_cnt == end_idx(), "did not schedule all" ); | 887 // assert( phi_cnt == end_idx(), "did not schedule all" ); |
887 return false; | 888 return false; |
888 } | 889 } |
889 | 890 |
890 #ifndef PRODUCT | 891 #ifndef PRODUCT |
891 if (cfg->trace_opto_pipelining()) { | 892 if (trace_opto_pipelining()) { |
892 tty->print_cr("#"); | 893 tty->print_cr("#"); |
893 tty->print_cr("# after schedule_local"); | 894 tty->print_cr("# after schedule_local"); |
894 for (uint i = 0;i < _nodes.size();i++) { | 895 for (uint i = 0;i < block->number_of_nodes();i++) { |
895 tty->print("# "); | 896 tty->print("# "); |
896 get_node(i)->fast_dump(); | 897 block->get_node(i)->fast_dump(); |
897 } | 898 } |
898 tty->cr(); | 899 tty->cr(); |
899 } | 900 } |
900 #endif | 901 #endif |
901 | 902 |
917 } | 918 } |
918 } | 919 } |
919 } | 920 } |
920 | 921 |
921 //------------------------------catch_cleanup_find_cloned_def------------------ | 922 //------------------------------catch_cleanup_find_cloned_def------------------ |
922 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { | 923 Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) { |
923 assert( use_blk != def_blk, "Inter-block cleanup only"); | 924 assert( use_blk != def_blk, "Inter-block cleanup only"); |
924 | 925 |
925 // The use is some block below the Catch. Find and return the clone of the def | 926 // The use is some block below the Catch. Find and return the clone of the def |
926 // that dominates the use. If there is no clone in a dominating block, then | 927 // that dominates the use. If there is no clone in a dominating block, then |
927 // create a phi for the def in a dominating block. | 928 // create a phi for the def in a dominating block. |
943 if( j == def_blk->_num_succs ) { | 944 if( j == def_blk->_num_succs ) { |
944 // Block at same level in dom-tree is not a successor. It needs a | 945 // Block at same level in dom-tree is not a successor. It needs a |
945 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | 946 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. |
946 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | 947 Node_Array inputs = new Node_List(Thread::current()->resource_area()); |
947 for(uint k = 1; k < use_blk->num_preds(); k++) { | 948 for(uint k = 1; k < use_blk->num_preds(); k++) { |
948 Block* block = cfg->get_block_for_node(use_blk->pred(k)); | 949 Block* block = get_block_for_node(use_blk->pred(k)); |
949 inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, cfg, n_clone_idx)); | 950 inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx)); |
950 } | 951 } |
951 | 952 |
952 // Check to see if the use_blk already has an identical phi inserted. | 953 // Check to see if the use_blk already has an identical phi inserted. |
953 // If it exists, it will be at the first position since all uses of a | 954 // If it exists, it will be at the first position since all uses of a |
954 // def are processed together. | 955 // def are processed together. |
966 | 967 |
967 // If an existing PhiNode was not found, make a new one. | 968 // If an existing PhiNode was not found, make a new one. |
968 if (fixup == NULL) { | 969 if (fixup == NULL) { |
969 Node *new_phi = PhiNode::make(use_blk->head(), def); | 970 Node *new_phi = PhiNode::make(use_blk->head(), def); |
970 use_blk->insert_node(new_phi, 1); | 971 use_blk->insert_node(new_phi, 1); |
971 cfg->map_node_to_block(new_phi, use_blk); | 972 map_node_to_block(new_phi, use_blk); |
972 for (uint k = 1; k < use_blk->num_preds(); k++) { | 973 for (uint k = 1; k < use_blk->num_preds(); k++) { |
973 new_phi->set_req(k, inputs[k]); | 974 new_phi->set_req(k, inputs[k]); |
974 } | 975 } |
975 fixup = new_phi; | 976 fixup = new_phi; |
976 } | 977 } |
1006 } | 1007 } |
1007 | 1008 |
1008 //------------------------------catch_cleanup_inter_block--------------------- | 1009 //------------------------------catch_cleanup_inter_block--------------------- |
1009 // Fix all input edges in use that reference "def". The use is in a different | 1010 // Fix all input edges in use that reference "def". The use is in a different |
1010 // block than the def. | 1011 // block than the def. |
1011 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { | 1012 void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) { |
1012 if( !use_blk ) return; // Can happen if the use is a precedence edge | 1013 if( !use_blk ) return; // Can happen if the use is a precedence edge |
1013 | 1014 |
1014 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, cfg, n_clone_idx); | 1015 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx); |
1015 catch_cleanup_fix_all_inputs(use, def, new_def); | 1016 catch_cleanup_fix_all_inputs(use, def, new_def); |
1016 } | 1017 } |
1017 | 1018 |
1018 //------------------------------call_catch_cleanup----------------------------- | 1019 //------------------------------call_catch_cleanup----------------------------- |
1019 // If we inserted any instructions between a Call and his CatchNode, | 1020 // If we inserted any instructions between a Call and his CatchNode, |
1020 // clone the instructions on all paths below the Catch. | 1021 // clone the instructions on all paths below the Catch. |
1021 void Block::call_catch_cleanup(PhaseCFG* cfg, Compile* C) { | 1022 void PhaseCFG::call_catch_cleanup(Block* block) { |
1022 | 1023 |
1023 // End of region to clone | 1024 // End of region to clone |
1024 uint end = end_idx(); | 1025 uint end = block->end_idx(); |
1025 if( !get_node(end)->is_Catch() ) return; | 1026 if( !block->get_node(end)->is_Catch() ) return; |
1026 // Start of region to clone | 1027 // Start of region to clone |
1027 uint beg = end; | 1028 uint beg = end; |
1028 while(!get_node(beg-1)->is_MachProj() || | 1029 while(!block->get_node(beg-1)->is_MachProj() || |
1029 !get_node(beg-1)->in(0)->is_MachCall() ) { | 1030 !block->get_node(beg-1)->in(0)->is_MachCall() ) { |
1030 beg--; | 1031 beg--; |
1031 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | 1032 assert(beg > 0,"Catch cleanup walking beyond block boundary"); |
1032 } | 1033 } |
1033 // Range of inserted instructions is [beg, end) | 1034 // Range of inserted instructions is [beg, end) |
1034 if( beg == end ) return; | 1035 if( beg == end ) return; |
1035 | 1036 |
1036 // Clone along all Catch output paths. Clone area between the 'beg' and | 1037 // Clone along all Catch output paths. Clone area between the 'beg' and |
1037 // 'end' indices. | 1038 // 'end' indices. |
1038 for( uint i = 0; i < _num_succs; i++ ) { | 1039 for( uint i = 0; i < block->_num_succs; i++ ) { |
1039 Block *sb = _succs[i]; | 1040 Block *sb = block->_succs[i]; |
1040 // Clone the entire area; ignoring the edge fixup for now. | 1041 // Clone the entire area; ignoring the edge fixup for now. |
1041 for( uint j = end; j > beg; j-- ) { | 1042 for( uint j = end; j > beg; j-- ) { |
1042 // It is safe here to clone a node with anti_dependence | 1043 // It is safe here to clone a node with anti_dependence |
1043 // since clones dominate on each path. | 1044 // since clones dominate on each path. |
1044 Node *clone = get_node(j-1)->clone(); | 1045 Node *clone = block->get_node(j-1)->clone(); |
1045 sb->insert_node(clone, 1); | 1046 sb->insert_node(clone, 1); |
1046 cfg->map_node_to_block(clone, sb); | 1047 map_node_to_block(clone, sb); |
1047 } | 1048 } |
1048 } | 1049 } |
1049 | 1050 |
1050 | 1051 |
1051 // Fixup edges. Check the def-use info per cloned Node | 1052 // Fixup edges. Check the def-use info per cloned Node |
1052 for(uint i2 = beg; i2 < end; i2++ ) { | 1053 for(uint i2 = beg; i2 < end; i2++ ) { |
1053 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | 1054 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block |
1054 Node *n = get_node(i2); // Node that got cloned | 1055 Node *n = block->get_node(i2); // Node that got cloned |
1055 // Need DU safe iterator because of edge manipulation in calls. | 1056 // Need DU safe iterator because of edge manipulation in calls. |
1056 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | 1057 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); |
1057 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | 1058 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { |
1058 out->push(n->fast_out(j1)); | 1059 out->push(n->fast_out(j1)); |
1059 } | 1060 } |
1060 uint max = out->size(); | 1061 uint max = out->size(); |
1061 for (uint j = 0; j < max; j++) {// For all users | 1062 for (uint j = 0; j < max; j++) {// For all users |
1062 Node *use = out->pop(); | 1063 Node *use = out->pop(); |
1063 Block *buse = cfg->get_block_for_node(use); | 1064 Block *buse = get_block_for_node(use); |
1064 if( use->is_Phi() ) { | 1065 if( use->is_Phi() ) { |
1065 for( uint k = 1; k < use->req(); k++ ) | 1066 for( uint k = 1; k < use->req(); k++ ) |
1066 if( use->in(k) == n ) { | 1067 if( use->in(k) == n ) { |
1067 Block* block = cfg->get_block_for_node(buse->pred(k)); | 1068 Block* b = get_block_for_node(buse->pred(k)); |
1068 Node *fixup = catch_cleanup_find_cloned_def(block, n, this, cfg, n_clone_idx); | 1069 Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx); |
1069 use->set_req(k, fixup); | 1070 use->set_req(k, fixup); |
1070 } | 1071 } |
1071 } else { | 1072 } else { |
1072 if (this == buse) { | 1073 if (block == buse) { |
1073 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | 1074 catch_cleanup_intra_block(use, n, block, beg, n_clone_idx); |
1074 } else { | 1075 } else { |
1075 catch_cleanup_inter_block(use, buse, n, this, cfg, n_clone_idx); | 1076 catch_cleanup_inter_block(use, buse, n, block, n_clone_idx); |
1076 } | 1077 } |
1077 } | 1078 } |
1078 } // End for all users | 1079 } // End for all users |
1079 | 1080 |
1080 } // End of for all Nodes in cloned area | 1081 } // End of for all Nodes in cloned area |
1081 | 1082 |
1082 // Remove the now-dead cloned ops | 1083 // Remove the now-dead cloned ops |
1083 for(uint i3 = beg; i3 < end; i3++ ) { | 1084 for(uint i3 = beg; i3 < end; i3++ ) { |
1084 get_node(beg)->disconnect_inputs(NULL, C); | 1085 block->get_node(beg)->disconnect_inputs(NULL, C); |
1085 remove_node(beg); | 1086 block->remove_node(beg); |
1086 } | 1087 } |
1087 | 1088 |
1088 // If the successor blocks have a CreateEx node, move it back to the top | 1089 // If the successor blocks have a CreateEx node, move it back to the top |
1089 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | 1090 for(uint i4 = 0; i4 < block->_num_succs; i4++ ) { |
1090 Block *sb = _succs[i4]; | 1091 Block *sb = block->_succs[i4]; |
1091 uint new_cnt = end - beg; | 1092 uint new_cnt = end - beg; |
1092 // Remove any newly created, but dead, nodes. | 1093 // Remove any newly created, but dead, nodes. |
1093 for( uint j = new_cnt; j > 0; j-- ) { | 1094 for( uint j = new_cnt; j > 0; j-- ) { |
1094 Node *n = sb->get_node(j); | 1095 Node *n = sb->get_node(j); |
1095 if (n->outcnt() == 0 && | 1096 if (n->outcnt() == 0 && |