Mercurial > hg > truffle
comparison src/share/vm/opto/lcm.cpp @ 14422:2b8e28fdf503
Merge
author | kvn |
---|---|
date | Tue, 05 Nov 2013 17:38:04 -0800 |
parents | e2722a66aba7 c9ccd7b85f20 |
children | b0133e4187d3 |
comparison
equal
deleted
inserted
replaced
14421:3068270ba476 | 14422:2b8e28fdf503 |
---|---|
59 // with suitable memory ops nearby. Use the memory op to do the NULL check. | 59 // with suitable memory ops nearby. Use the memory op to do the NULL check. |
60 // I can generate a memory op if there is not one nearby. | 60 // I can generate a memory op if there is not one nearby. |
61 // The proj is the control projection for the not-null case. | 61 // The proj is the control projection for the not-null case. |
62 // The val is the pointer being checked for nullness or | 62 // The val is the pointer being checked for nullness or |
63 // decodeHeapOop_not_null node if it did not fold into address. | 63 // decodeHeapOop_not_null node if it did not fold into address. |
64 void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { | 64 void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) { |
65 // Assume if null check need for 0 offset then always needed | 65 // Assume if null check need for 0 offset then always needed |
66 // Intel solaris doesn't support any null checks yet and no | 66 // Intel solaris doesn't support any null checks yet and no |
67 // mechanism exists (yet) to set the switches at an os_cpu level | 67 // mechanism exists (yet) to set the switches at an os_cpu level |
68 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; | 68 if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; |
69 | 69 |
70 // Make sure the ptr-is-null path appears to be uncommon! | 70 // Make sure the ptr-is-null path appears to be uncommon! |
71 float f = end()->as_MachIf()->_prob; | 71 float f = block->end()->as_MachIf()->_prob; |
72 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; | 72 if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; |
73 if( f > PROB_UNLIKELY_MAG(4) ) return; | 73 if( f > PROB_UNLIKELY_MAG(4) ) return; |
74 | 74 |
75 uint bidx = 0; // Capture index of value into memop | 75 uint bidx = 0; // Capture index of value into memop |
76 bool was_store; // Memory op is a store op | 76 bool was_store; // Memory op is a store op |
77 | 77 |
78 // Get the successor block for if the test ptr is non-null | 78 // Get the successor block for if the test ptr is non-null |
79 Block* not_null_block; // this one goes with the proj | 79 Block* not_null_block; // this one goes with the proj |
80 Block* null_block; | 80 Block* null_block; |
81 if (_nodes[_nodes.size()-1] == proj) { | 81 if (block->get_node(block->number_of_nodes()-1) == proj) { |
82 null_block = _succs[0]; | 82 null_block = block->_succs[0]; |
83 not_null_block = _succs[1]; | 83 not_null_block = block->_succs[1]; |
84 } else { | 84 } else { |
85 assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other"); | 85 assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other"); |
86 not_null_block = _succs[0]; | 86 not_null_block = block->_succs[0]; |
87 null_block = _succs[1]; | 87 null_block = block->_succs[1]; |
88 } | 88 } |
89 while (null_block->is_Empty() == Block::empty_with_goto) { | 89 while (null_block->is_Empty() == Block::empty_with_goto) { |
90 null_block = null_block->_succs[0]; | 90 null_block = null_block->_succs[0]; |
91 } | 91 } |
92 | 92 |
94 // (See Parse::do_if and Parse::do_ifnull for the reason | 94 // (See Parse::do_if and Parse::do_ifnull for the reason |
95 // we need an uncommon trap. Briefly, we need a way to | 95 // we need an uncommon trap. Briefly, we need a way to |
96 // detect failure of this optimization, as in 6366351.) | 96 // detect failure of this optimization, as in 6366351.) |
97 { | 97 { |
98 bool found_trap = false; | 98 bool found_trap = false; |
99 for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) { | 99 for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) { |
100 Node* nn = null_block->_nodes[i1]; | 100 Node* nn = null_block->get_node(i1); |
101 if (nn->is_MachCall() && | 101 if (nn->is_MachCall() && |
102 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { | 102 nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) { |
103 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); | 103 const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type(); |
104 if (trtype->isa_int() && trtype->is_int()->is_con()) { | 104 if (trtype->isa_int() && trtype->is_int()->is_con()) { |
105 jint tr_con = trtype->is_int()->get_con(); | 105 jint tr_con = trtype->is_int()->get_con(); |
238 continue; // Give up is reference is beyond 4K page size | 238 continue; // Give up is reference is beyond 4K page size |
239 } | 239 } |
240 } | 240 } |
241 | 241 |
242 // Check ctrl input to see if the null-check dominates the memory op | 242 // Check ctrl input to see if the null-check dominates the memory op |
243 Block *cb = cfg->get_block_for_node(mach); | 243 Block *cb = get_block_for_node(mach); |
244 cb = cb->_idom; // Always hoist at least 1 block | 244 cb = cb->_idom; // Always hoist at least 1 block |
245 if( !was_store ) { // Stores can be hoisted only one block | 245 if( !was_store ) { // Stores can be hoisted only one block |
246 while( cb->_dom_depth > (_dom_depth + 1)) | 246 while( cb->_dom_depth > (block->_dom_depth + 1)) |
247 cb = cb->_idom; // Hoist loads as far as we want | 247 cb = cb->_idom; // Hoist loads as far as we want |
248 // The non-null-block should dominate the memory op, too. Live | 248 // The non-null-block should dominate the memory op, too. Live |
249 // range spilling will insert a spill in the non-null-block if it is | 249 // range spilling will insert a spill in the non-null-block if it is |
250 // needs to spill the memory op for an implicit null check. | 250 // needs to spill the memory op for an implicit null check. |
251 if (cb->_dom_depth == (_dom_depth + 1)) { | 251 if (cb->_dom_depth == (block->_dom_depth + 1)) { |
252 if (cb != not_null_block) continue; | 252 if (cb != not_null_block) continue; |
253 cb = cb->_idom; | 253 cb = cb->_idom; |
254 } | 254 } |
255 } | 255 } |
256 if( cb != this ) continue; | 256 if( cb != block ) continue; |
257 | 257 |
258 // Found a memory user; see if it can be hoisted to check-block | 258 // Found a memory user; see if it can be hoisted to check-block |
259 uint vidx = 0; // Capture index of value into memop | 259 uint vidx = 0; // Capture index of value into memop |
260 uint j; | 260 uint j; |
261 for( j = mach->req()-1; j > 0; j-- ) { | 261 for( j = mach->req()-1; j > 0; j-- ) { |
263 vidx = j; | 263 vidx = j; |
264 // Ignore DecodeN val which could be hoisted to where needed. | 264 // Ignore DecodeN val which could be hoisted to where needed. |
265 if( is_decoden ) continue; | 265 if( is_decoden ) continue; |
266 } | 266 } |
267 // Block of memory-op input | 267 // Block of memory-op input |
268 Block *inb = cfg->get_block_for_node(mach->in(j)); | 268 Block *inb = get_block_for_node(mach->in(j)); |
269 Block *b = this; // Start from nul check | 269 Block *b = block; // Start from nul check |
270 while( b != inb && b->_dom_depth > inb->_dom_depth ) | 270 while( b != inb && b->_dom_depth > inb->_dom_depth ) |
271 b = b->_idom; // search upwards for input | 271 b = b->_idom; // search upwards for input |
272 // See if input dominates null check | 272 // See if input dominates null check |
273 if( b != inb ) | 273 if( b != inb ) |
274 break; | 274 break; |
275 } | 275 } |
276 if( j > 0 ) | 276 if( j > 0 ) |
277 continue; | 277 continue; |
278 Block *mb = cfg->get_block_for_node(mach); | 278 Block *mb = get_block_for_node(mach); |
279 // Hoisting stores requires more checks for the anti-dependence case. | 279 // Hoisting stores requires more checks for the anti-dependence case. |
280 // Give up hoisting if we have to move the store past any load. | 280 // Give up hoisting if we have to move the store past any load. |
281 if( was_store ) { | 281 if( was_store ) { |
282 Block *b = mb; // Start searching here for a local load | 282 Block *b = mb; // Start searching here for a local load |
283 // mach use (faulting) trying to hoist | 283 // mach use (faulting) trying to hoist |
284 // n might be blocker to hoisting | 284 // n might be blocker to hoisting |
285 while( b != this ) { | 285 while( b != block ) { |
286 uint k; | 286 uint k; |
287 for( k = 1; k < b->_nodes.size(); k++ ) { | 287 for( k = 1; k < b->number_of_nodes(); k++ ) { |
288 Node *n = b->_nodes[k]; | 288 Node *n = b->get_node(k); |
289 if( n->needs_anti_dependence_check() && | 289 if( n->needs_anti_dependence_check() && |
290 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) | 290 n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) |
291 break; // Found anti-dependent load | 291 break; // Found anti-dependent load |
292 } | 292 } |
293 if( k < b->_nodes.size() ) | 293 if( k < b->number_of_nodes() ) |
294 break; // Found anti-dependent load | 294 break; // Found anti-dependent load |
295 // Make sure control does not do a merge (would have to check allpaths) | 295 // Make sure control does not do a merge (would have to check allpaths) |
296 if( b->num_preds() != 2 ) break; | 296 if( b->num_preds() != 2 ) break; |
297 b = cfg->get_block_for_node(b->pred(1)); // Move up to predecessor block | 297 b = get_block_for_node(b->pred(1)); // Move up to predecessor block |
298 } | 298 } |
299 if( b != this ) continue; | 299 if( b != block ) continue; |
300 } | 300 } |
301 | 301 |
302 // Make sure this memory op is not already being used for a NullCheck | 302 // Make sure this memory op is not already being used for a NullCheck |
303 Node *e = mb->end(); | 303 Node *e = mb->end(); |
304 if( e->is_MachNullCheck() && e->in(1) == mach ) | 304 if( e->is_MachNullCheck() && e->in(1) == mach ) |
305 continue; // Already being used as a NULL check | 305 continue; // Already being used as a NULL check |
306 | 306 |
307 // Found a candidate! Pick one with least dom depth - the highest | 307 // Found a candidate! Pick one with least dom depth - the highest |
308 // in the dom tree should be closest to the null check. | 308 // in the dom tree should be closest to the null check. |
309 if (best == NULL || cfg->get_block_for_node(mach)->_dom_depth < cfg->get_block_for_node(best)->_dom_depth) { | 309 if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) { |
310 best = mach; | 310 best = mach; |
311 bidx = vidx; | 311 bidx = vidx; |
312 } | 312 } |
313 } | 313 } |
314 // No candidate! | 314 // No candidate! |
320 extern int implicit_null_checks; | 320 extern int implicit_null_checks; |
321 implicit_null_checks++; | 321 implicit_null_checks++; |
322 | 322 |
323 if( is_decoden ) { | 323 if( is_decoden ) { |
324 // Check if we need to hoist decodeHeapOop_not_null first. | 324 // Check if we need to hoist decodeHeapOop_not_null first. |
325 Block *valb = cfg->get_block_for_node(val); | 325 Block *valb = get_block_for_node(val); |
326 if( this != valb && this->_dom_depth < valb->_dom_depth ) { | 326 if( block != valb && block->_dom_depth < valb->_dom_depth ) { |
327 // Hoist it up to the end of the test block. | 327 // Hoist it up to the end of the test block. |
328 valb->find_remove(val); | 328 valb->find_remove(val); |
329 this->add_inst(val); | 329 block->add_inst(val); |
330 cfg->map_node_to_block(val, this); | 330 map_node_to_block(val, block); |
331 // DecodeN on x86 may kill flags. Check for flag-killing projections | 331 // DecodeN on x86 may kill flags. Check for flag-killing projections |
332 // that also need to be hoisted. | 332 // that also need to be hoisted. |
333 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { | 333 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { |
334 Node* n = val->fast_out(j); | 334 Node* n = val->fast_out(j); |
335 if( n->is_MachProj() ) { | 335 if( n->is_MachProj() ) { |
336 cfg->get_block_for_node(n)->find_remove(n); | 336 get_block_for_node(n)->find_remove(n); |
337 this->add_inst(n); | 337 block->add_inst(n); |
338 cfg->map_node_to_block(n, this); | 338 map_node_to_block(n, block); |
339 } | 339 } |
340 } | 340 } |
341 } | 341 } |
342 } | 342 } |
343 // Hoist the memory candidate up to the end of the test block. | 343 // Hoist the memory candidate up to the end of the test block. |
344 Block *old_block = cfg->get_block_for_node(best); | 344 Block *old_block = get_block_for_node(best); |
345 old_block->find_remove(best); | 345 old_block->find_remove(best); |
346 add_inst(best); | 346 block->add_inst(best); |
347 cfg->map_node_to_block(best, this); | 347 map_node_to_block(best, block); |
348 | 348 |
349 // Move the control dependence | 349 // Move the control dependence |
350 if (best->in(0) && best->in(0) == old_block->_nodes[0]) | 350 if (best->in(0) && best->in(0) == old_block->head()) |
351 best->set_req(0, _nodes[0]); | 351 best->set_req(0, block->head()); |
352 | 352 |
353 // Check for flag-killing projections that also need to be hoisted | 353 // Check for flag-killing projections that also need to be hoisted |
354 // Should be DU safe because no edge updates. | 354 // Should be DU safe because no edge updates. |
355 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { | 355 for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { |
356 Node* n = best->fast_out(j); | 356 Node* n = best->fast_out(j); |
357 if( n->is_MachProj() ) { | 357 if( n->is_MachProj() ) { |
358 cfg->get_block_for_node(n)->find_remove(n); | 358 get_block_for_node(n)->find_remove(n); |
359 add_inst(n); | 359 block->add_inst(n); |
360 cfg->map_node_to_block(n, this); | 360 map_node_to_block(n, block); |
361 } | 361 } |
362 } | 362 } |
363 | 363 |
364 Compile *C = cfg->C; | |
365 // proj==Op_True --> ne test; proj==Op_False --> eq test. | 364 // proj==Op_True --> ne test; proj==Op_False --> eq test. |
366 // One of two graph shapes got matched: | 365 // One of two graph shapes got matched: |
367 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) | 366 // (IfTrue (If (Bool NE (CmpP ptr NULL)))) |
368 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) | 367 // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) |
369 // NULL checks are always branch-if-eq. If we see a IfTrue projection | 368 // NULL checks are always branch-if-eq. If we see a IfTrue projection |
370 // then we are replacing a 'ne' test with a 'eq' NULL check test. | 369 // then we are replacing a 'ne' test with a 'eq' NULL check test. |
371 // We need to flip the projections to keep the same semantics. | 370 // We need to flip the projections to keep the same semantics. |
372 if( proj->Opcode() == Op_IfTrue ) { | 371 if( proj->Opcode() == Op_IfTrue ) { |
373 // Swap order of projections in basic block to swap branch targets | 372 // Swap order of projections in basic block to swap branch targets |
374 Node *tmp1 = _nodes[end_idx()+1]; | 373 Node *tmp1 = block->get_node(block->end_idx()+1); |
375 Node *tmp2 = _nodes[end_idx()+2]; | 374 Node *tmp2 = block->get_node(block->end_idx()+2); |
376 _nodes.map(end_idx()+1, tmp2); | 375 block->map_node(tmp2, block->end_idx()+1); |
377 _nodes.map(end_idx()+2, tmp1); | 376 block->map_node(tmp1, block->end_idx()+2); |
378 Node *tmp = new (C) Node(C->top()); // Use not NULL input | 377 Node *tmp = new (C) Node(C->top()); // Use not NULL input |
379 tmp1->replace_by(tmp); | 378 tmp1->replace_by(tmp); |
380 tmp2->replace_by(tmp1); | 379 tmp2->replace_by(tmp1); |
381 tmp->replace_by(tmp2); | 380 tmp->replace_by(tmp2); |
382 tmp->destruct(); | 381 tmp->destruct(); |
385 // Remove the existing null check; use a new implicit null check instead. | 384 // Remove the existing null check; use a new implicit null check instead. |
386 // Since schedule-local needs precise def-use info, we need to correct | 385 // Since schedule-local needs precise def-use info, we need to correct |
387 // it as well. | 386 // it as well. |
388 Node *old_tst = proj->in(0); | 387 Node *old_tst = proj->in(0); |
389 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); | 388 MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); |
390 _nodes.map(end_idx(),nul_chk); | 389 block->map_node(nul_chk, block->end_idx()); |
391 cfg->map_node_to_block(nul_chk, this); | 390 map_node_to_block(nul_chk, block); |
392 // Redirect users of old_test to nul_chk | 391 // Redirect users of old_test to nul_chk |
393 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) | 392 for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) |
394 old_tst->last_out(i2)->set_req(0, nul_chk); | 393 old_tst->last_out(i2)->set_req(0, nul_chk); |
395 // Clean-up any dead code | 394 // Clean-up any dead code |
396 for (uint i3 = 0; i3 < old_tst->req(); i3++) | 395 for (uint i3 = 0; i3 < old_tst->req(); i3++) |
397 old_tst->set_req(i3, NULL); | 396 old_tst->set_req(i3, NULL); |
398 | 397 |
399 cfg->latency_from_uses(nul_chk); | 398 latency_from_uses(nul_chk); |
400 cfg->latency_from_uses(best); | 399 latency_from_uses(best); |
401 } | 400 } |
402 | 401 |
403 | 402 |
404 //------------------------------select----------------------------------------- | 403 //------------------------------select----------------------------------------- |
405 // Select a nice fellow from the worklist to schedule next. If there is only | 404 // Select a nice fellow from the worklist to schedule next. If there is only |
409 // These are chosen immediately. Some instructions are required to immediately | 408 // These are chosen immediately. Some instructions are required to immediately |
410 // precede the last instruction in the block, and these are taken last. Of the | 409 // precede the last instruction in the block, and these are taken last. Of the |
411 // remaining cases (most), choose the instruction with the greatest latency | 410 // remaining cases (most), choose the instruction with the greatest latency |
412 // (that is, the most number of pseudo-cycles required to the end of the | 411 // (that is, the most number of pseudo-cycles required to the end of the |
413 // routine). If there is a tie, choose the instruction with the most inputs. | 412 // routine). If there is a tie, choose the instruction with the most inputs. |
414 Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { | 413 Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { |
415 | 414 |
416 // If only a single entry on the stack, use it | 415 // If only a single entry on the stack, use it |
417 uint cnt = worklist.size(); | 416 uint cnt = worklist.size(); |
418 if (cnt == 1) { | 417 if (cnt == 1) { |
419 Node *n = worklist[0]; | 418 Node *n = worklist[0]; |
443 worklist.map(i,worklist.pop()); | 442 worklist.map(i,worklist.pop()); |
444 return n; | 443 return n; |
445 } | 444 } |
446 | 445 |
447 // Final call in a block must be adjacent to 'catch' | 446 // Final call in a block must be adjacent to 'catch' |
448 Node *e = end(); | 447 Node *e = block->end(); |
449 if( e->is_Catch() && e->in(0)->in(0) == n ) | 448 if( e->is_Catch() && e->in(0)->in(0) == n ) |
450 continue; | 449 continue; |
451 | 450 |
452 // Memory op for an implicit null check has to be at the end of the block | 451 // Memory op for an implicit null check has to be at the end of the block |
453 if( e->is_MachNullCheck() && e->in(1) == n ) | 452 if( e->is_MachNullCheck() && e->in(1) == n ) |
469 | 468 |
470 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 469 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
471 Node* use = n->fast_out(j); | 470 Node* use = n->fast_out(j); |
472 | 471 |
473 // The use is a conditional branch, make them adjacent | 472 // The use is a conditional branch, make them adjacent |
474 if (use->is_MachIf() && cfg->get_block_for_node(use) == this) { | 473 if (use->is_MachIf() && get_block_for_node(use) == block) { |
474 found_machif = true; | |
475 break; | |
476 } | |
477 | |
478 // For nodes that produce a FlagsProj, make the node adjacent to the | |
479 // use of the FlagsProj | |
480 if (use->is_FlagsProj() && get_block_for_node(use) == block) { | |
475 found_machif = true; | 481 found_machif = true; |
476 break; | 482 break; |
477 } | 483 } |
478 | 484 |
479 // More than this instruction pending for successor to be ready, | 485 // More than this instruction pending for successor to be ready, |
502 // MachTemps should be scheduled last so they are near their uses | 508 // MachTemps should be scheduled last so they are near their uses |
503 if (n->is_MachTemp()) { | 509 if (n->is_MachTemp()) { |
504 n_choice = 1; | 510 n_choice = 1; |
505 } | 511 } |
506 | 512 |
507 uint n_latency = cfg->get_latency_for_node(n); | 513 uint n_latency = get_latency_for_node(n); |
508 uint n_score = n->req(); // Many inputs get high score to break ties | 514 uint n_score = n->req(); // Many inputs get high score to break ties |
509 | 515 |
510 // Keep best latency found | 516 // Keep best latency found |
511 cand_cnt++; | 517 cand_cnt++; |
512 if (choice < n_choice || | 518 if (choice < n_choice || |
530 return n; | 536 return n; |
531 } | 537 } |
532 | 538 |
533 | 539 |
534 //------------------------------set_next_call---------------------------------- | 540 //------------------------------set_next_call---------------------------------- |
535 void Block::set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg) { | 541 void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) { |
536 if( next_call.test_set(n->_idx) ) return; | 542 if( next_call.test_set(n->_idx) ) return; |
537 for( uint i=0; i<n->len(); i++ ) { | 543 for( uint i=0; i<n->len(); i++ ) { |
538 Node *m = n->in(i); | 544 Node *m = n->in(i); |
539 if( !m ) continue; // must see all nodes in block that precede call | 545 if( !m ) continue; // must see all nodes in block that precede call |
540 if (cfg->get_block_for_node(m) == this) { | 546 if (get_block_for_node(m) == block) { |
541 set_next_call(m, next_call, cfg); | 547 set_next_call(block, m, next_call); |
542 } | 548 } |
543 } | 549 } |
544 } | 550 } |
545 | 551 |
546 //------------------------------needed_for_next_call--------------------------- | 552 //------------------------------needed_for_next_call--------------------------- |
547 // Set the flag 'next_call' for each Node that is needed for the next call to | 553 // Set the flag 'next_call' for each Node that is needed for the next call to |
548 // be scheduled. This flag lets me bias scheduling so Nodes needed for the | 554 // be scheduled. This flag lets me bias scheduling so Nodes needed for the |
549 // next subroutine call get priority - basically it moves things NOT needed | 555 // next subroutine call get priority - basically it moves things NOT needed |
550 // for the next call till after the call. This prevents me from trying to | 556 // for the next call till after the call. This prevents me from trying to |
551 // carry lots of stuff live across a call. | 557 // carry lots of stuff live across a call. |
552 void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg) { | 558 void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) { |
553 // Find the next control-defining Node in this block | 559 // Find the next control-defining Node in this block |
554 Node* call = NULL; | 560 Node* call = NULL; |
555 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { | 561 for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { |
556 Node* m = this_call->fast_out(i); | 562 Node* m = this_call->fast_out(i); |
557 if(cfg->get_block_for_node(m) == this && // Local-block user | 563 if (get_block_for_node(m) == block && // Local-block user |
558 m != this_call && // Not self-start node | 564 m != this_call && // Not self-start node |
559 m->is_MachCall() ) | 565 m->is_MachCall()) { |
560 call = m; | 566 call = m; |
561 break; | 567 break; |
568 } | |
562 } | 569 } |
563 if (call == NULL) return; // No next call (e.g., block end is near) | 570 if (call == NULL) return; // No next call (e.g., block end is near) |
564 // Set next-call for all inputs to this call | 571 // Set next-call for all inputs to this call |
565 set_next_call(call, next_call, cfg); | 572 set_next_call(block, call, next_call); |
566 } | 573 } |
567 | 574 |
568 //------------------------------add_call_kills------------------------------------- | 575 //------------------------------add_call_kills------------------------------------- |
569 void Block::add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { | 576 // helper function that adds caller save registers to MachProjNode |
577 static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { | |
570 // Fill in the kill mask for the call | 578 // Fill in the kill mask for the call |
571 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { | 579 for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { |
572 if( !regs.Member(r) ) { // Not already defined by the call | 580 if( !regs.Member(r) ) { // Not already defined by the call |
573 // Save-on-call register? | 581 // Save-on-call register? |
574 if ((save_policy[r] == 'C') || | 582 if ((save_policy[r] == 'C') || |
580 } | 588 } |
581 } | 589 } |
582 | 590 |
583 | 591 |
584 //------------------------------sched_call------------------------------------- | 592 //------------------------------sched_call------------------------------------- |
585 uint Block::sched_call( Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { | 593 uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) { |
586 RegMask regs; | 594 RegMask regs; |
587 | 595 |
588 // Schedule all the users of the call right now. All the users are | 596 // Schedule all the users of the call right now. All the users are |
589 // projection Nodes, so they must be scheduled next to the call. | 597 // projection Nodes, so they must be scheduled next to the call. |
590 // Collect all the defined registers. | 598 // Collect all the defined registers. |
593 assert( n->is_MachProj(), "" ); | 601 assert( n->is_MachProj(), "" ); |
594 int n_cnt = ready_cnt.at(n->_idx)-1; | 602 int n_cnt = ready_cnt.at(n->_idx)-1; |
595 ready_cnt.at_put(n->_idx, n_cnt); | 603 ready_cnt.at_put(n->_idx, n_cnt); |
596 assert( n_cnt == 0, "" ); | 604 assert( n_cnt == 0, "" ); |
597 // Schedule next to call | 605 // Schedule next to call |
598 _nodes.map(node_cnt++, n); | 606 block->map_node(n, node_cnt++); |
599 // Collect defined registers | 607 // Collect defined registers |
600 regs.OR(n->out_RegMask()); | 608 regs.OR(n->out_RegMask()); |
601 // Check for scheduling the next control-definer | 609 // Check for scheduling the next control-definer |
602 if( n->bottom_type() == Type::CONTROL ) | 610 if( n->bottom_type() == Type::CONTROL ) |
603 // Warm up next pile of heuristic bits | 611 // Warm up next pile of heuristic bits |
604 needed_for_next_call(n, next_call, cfg); | 612 needed_for_next_call(block, n, next_call); |
605 | 613 |
606 // Children of projections are now all ready | 614 // Children of projections are now all ready |
607 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 615 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
608 Node* m = n->fast_out(j); // Get user | 616 Node* m = n->fast_out(j); // Get user |
609 if(cfg->get_block_for_node(m) != this) { | 617 if(get_block_for_node(m) != block) { |
610 continue; | 618 continue; |
611 } | 619 } |
612 if( m->is_Phi() ) continue; | 620 if( m->is_Phi() ) continue; |
613 int m_cnt = ready_cnt.at(m->_idx)-1; | 621 int m_cnt = ready_cnt.at(m->_idx)-1; |
614 ready_cnt.at_put(m->_idx, m_cnt); | 622 ready_cnt.at_put(m->_idx, m_cnt); |
618 | 626 |
619 } | 627 } |
620 | 628 |
621 // Act as if the call defines the Frame Pointer. | 629 // Act as if the call defines the Frame Pointer. |
622 // Certainly the FP is alive and well after the call. | 630 // Certainly the FP is alive and well after the call. |
623 regs.Insert(matcher.c_frame_pointer()); | 631 regs.Insert(_matcher.c_frame_pointer()); |
624 | 632 |
625 // Set all registers killed and not already defined by the call. | 633 // Set all registers killed and not already defined by the call. |
626 uint r_cnt = mcall->tf()->range()->cnt(); | 634 uint r_cnt = mcall->tf()->range()->cnt(); |
627 int op = mcall->ideal_Opcode(); | 635 int op = mcall->ideal_Opcode(); |
628 MachProjNode *proj = new (matcher.C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); | 636 MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); |
629 cfg->map_node_to_block(proj, this); | 637 map_node_to_block(proj, block); |
630 _nodes.insert(node_cnt++, proj); | 638 block->insert_node(proj, node_cnt++); |
631 | 639 |
632 // Select the right register save policy. | 640 // Select the right register save policy. |
633 const char * save_policy; | 641 const char * save_policy; |
634 switch (op) { | 642 switch (op) { |
635 case Op_CallRuntime: | 643 case Op_CallRuntime: |
636 case Op_CallLeaf: | 644 case Op_CallLeaf: |
637 case Op_CallLeafNoFP: | 645 case Op_CallLeafNoFP: |
638 // Calling C code so use C calling convention | 646 // Calling C code so use C calling convention |
639 save_policy = matcher._c_reg_save_policy; | 647 save_policy = _matcher._c_reg_save_policy; |
640 break; | 648 break; |
641 | 649 |
642 case Op_CallStaticJava: | 650 case Op_CallStaticJava: |
643 case Op_CallDynamicJava: | 651 case Op_CallDynamicJava: |
644 // Calling Java code so use Java calling convention | 652 // Calling Java code so use Java calling convention |
645 save_policy = matcher._register_save_policy; | 653 save_policy = _matcher._register_save_policy; |
646 break; | 654 break; |
647 | 655 |
648 default: | 656 default: |
649 ShouldNotReachHere(); | 657 ShouldNotReachHere(); |
650 } | 658 } |
675 } | 683 } |
676 | 684 |
677 | 685 |
678 //------------------------------schedule_local--------------------------------- | 686 //------------------------------schedule_local--------------------------------- |
679 // Topological sort within a block. Someday become a real scheduler. | 687 // Topological sort within a block. Someday become a real scheduler. |
680 bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) { | 688 bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { |
681 // Already "sorted" are the block start Node (as the first entry), and | 689 // Already "sorted" are the block start Node (as the first entry), and |
682 // the block-ending Node and any trailing control projections. We leave | 690 // the block-ending Node and any trailing control projections. We leave |
683 // these alone. PhiNodes and ParmNodes are made to follow the block start | 691 // these alone. PhiNodes and ParmNodes are made to follow the block start |
684 // Node. Everything else gets topo-sorted. | 692 // Node. Everything else gets topo-sorted. |
685 | 693 |
686 #ifndef PRODUCT | 694 #ifndef PRODUCT |
687 if (cfg->trace_opto_pipelining()) { | 695 if (trace_opto_pipelining()) { |
688 tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); | 696 tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); |
689 for (uint i = 0;i < _nodes.size();i++) { | 697 for (uint i = 0;i < block->number_of_nodes(); i++) { |
690 tty->print("# "); | 698 tty->print("# "); |
691 _nodes[i]->fast_dump(); | 699 block->get_node(i)->fast_dump(); |
692 } | 700 } |
693 tty->print_cr("#"); | 701 tty->print_cr("#"); |
694 } | 702 } |
695 #endif | 703 #endif |
696 | 704 |
697 // RootNode is already sorted | 705 // RootNode is already sorted |
698 if( _nodes.size() == 1 ) return true; | 706 if (block->number_of_nodes() == 1) { |
707 return true; | |
708 } | |
699 | 709 |
700 // Move PhiNodes and ParmNodes from 1 to cnt up to the start | 710 // Move PhiNodes and ParmNodes from 1 to cnt up to the start |
701 uint node_cnt = end_idx(); | 711 uint node_cnt = block->end_idx(); |
702 uint phi_cnt = 1; | 712 uint phi_cnt = 1; |
703 uint i; | 713 uint i; |
704 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi | 714 for( i = 1; i<node_cnt; i++ ) { // Scan for Phi |
705 Node *n = _nodes[i]; | 715 Node *n = block->get_node(i); |
706 if( n->is_Phi() || // Found a PhiNode or ParmNode | 716 if( n->is_Phi() || // Found a PhiNode or ParmNode |
707 (n->is_Proj() && n->in(0) == head()) ) { | 717 (n->is_Proj() && n->in(0) == block->head()) ) { |
708 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt | 718 // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt |
709 _nodes.map(i,_nodes[phi_cnt]); | 719 block->map_node(block->get_node(phi_cnt), i); |
710 _nodes.map(phi_cnt++,n); // swap Phi/Parm up front | 720 block->map_node(n, phi_cnt++); // swap Phi/Parm up front |
711 } else { // All others | 721 } else { // All others |
712 // Count block-local inputs to 'n' | 722 // Count block-local inputs to 'n' |
713 uint cnt = n->len(); // Input count | 723 uint cnt = n->len(); // Input count |
714 uint local = 0; | 724 uint local = 0; |
715 for( uint j=0; j<cnt; j++ ) { | 725 for( uint j=0; j<cnt; j++ ) { |
716 Node *m = n->in(j); | 726 Node *m = n->in(j); |
717 if( m && cfg->get_block_for_node(m) == this && !m->is_top() ) | 727 if( m && get_block_for_node(m) == block && !m->is_top() ) |
718 local++; // One more block-local input | 728 local++; // One more block-local input |
719 } | 729 } |
720 ready_cnt.at_put(n->_idx, local); // Count em up | 730 ready_cnt.at_put(n->_idx, local); // Count em up |
721 | 731 |
722 #ifdef ASSERT | 732 #ifdef ASSERT |
724 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { | 734 if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { |
725 // Check the precedence edges | 735 // Check the precedence edges |
726 for (uint prec = n->req(); prec < n->len(); prec++) { | 736 for (uint prec = n->req(); prec < n->len(); prec++) { |
727 Node* oop_store = n->in(prec); | 737 Node* oop_store = n->in(prec); |
728 if (oop_store != NULL) { | 738 if (oop_store != NULL) { |
729 assert(cfg->get_block_for_node(oop_store)->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); | 739 assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); |
730 } | 740 } |
731 } | 741 } |
732 } | 742 } |
733 } | 743 } |
734 #endif | 744 #endif |
748 n->del_req(TypeFunc::Parms); | 758 n->del_req(TypeFunc::Parms); |
749 n->add_prec(x); | 759 n->add_prec(x); |
750 } | 760 } |
751 } | 761 } |
752 } | 762 } |
753 for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count | 763 for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count |
754 ready_cnt.at_put(_nodes[i2]->_idx, 0); | 764 ready_cnt.at_put(block->get_node(i2)->_idx, 0); |
755 | 765 |
756 // All the prescheduled guys do not hold back internal nodes | 766 // All the prescheduled guys do not hold back internal nodes |
757 uint i3; | 767 uint i3; |
758 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled | 768 for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled |
759 Node *n = _nodes[i3]; // Get pre-scheduled | 769 Node *n = block->get_node(i3); // Get pre-scheduled |
760 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { | 770 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { |
761 Node* m = n->fast_out(j); | 771 Node* m = n->fast_out(j); |
762 if (cfg->get_block_for_node(m) == this) { // Local-block user | 772 if (get_block_for_node(m) == block) { // Local-block user |
763 int m_cnt = ready_cnt.at(m->_idx)-1; | 773 int m_cnt = ready_cnt.at(m->_idx)-1; |
764 ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count | 774 ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count |
765 } | 775 } |
766 } | 776 } |
767 } | 777 } |
768 | 778 |
769 Node_List delay; | 779 Node_List delay; |
770 // Make a worklist | 780 // Make a worklist |
771 Node_List worklist; | 781 Node_List worklist; |
772 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist | 782 for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist |
773 Node *m = _nodes[i4]; | 783 Node *m = block->get_node(i4); |
774 if( !ready_cnt.at(m->_idx) ) { // Zero ready count? | 784 if( !ready_cnt.at(m->_idx) ) { // Zero ready count? |
775 if (m->is_iteratively_computed()) { | 785 if (m->is_iteratively_computed()) { |
776 // Push induction variable increments last to allow other uses | 786 // Push induction variable increments last to allow other uses |
777 // of the phi to be scheduled first. The select() method breaks | 787 // of the phi to be scheduled first. The select() method breaks |
778 // ties in scheduling by worklist order. | 788 // ties in scheduling by worklist order. |
790 Node* d = delay.pop(); | 800 Node* d = delay.pop(); |
791 worklist.push(d); | 801 worklist.push(d); |
792 } | 802 } |
793 | 803 |
794 // Warm up the 'next_call' heuristic bits | 804 // Warm up the 'next_call' heuristic bits |
795 needed_for_next_call(_nodes[0], next_call, cfg); | 805 needed_for_next_call(block, block->head(), next_call); |
796 | 806 |
797 #ifndef PRODUCT | 807 #ifndef PRODUCT |
798 if (cfg->trace_opto_pipelining()) { | 808 if (trace_opto_pipelining()) { |
799 for (uint j=0; j<_nodes.size(); j++) { | 809 for (uint j=0; j< block->number_of_nodes(); j++) { |
800 Node *n = _nodes[j]; | 810 Node *n = block->get_node(j); |
801 int idx = n->_idx; | 811 int idx = n->_idx; |
802 tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); | 812 tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); |
803 tty->print("latency:%3d ", cfg->get_latency_for_node(n)); | 813 tty->print("latency:%3d ", get_latency_for_node(n)); |
804 tty->print("%4d: %s\n", idx, n->Name()); | 814 tty->print("%4d: %s\n", idx, n->Name()); |
805 } | 815 } |
806 } | 816 } |
807 #endif | 817 #endif |
808 | 818 |
809 uint max_idx = (uint)ready_cnt.length(); | 819 uint max_idx = (uint)ready_cnt.length(); |
810 // Pull from worklist and schedule | 820 // Pull from worklist and schedule |
811 while( worklist.size() ) { // Worklist is not ready | 821 while( worklist.size() ) { // Worklist is not ready |
812 | 822 |
813 #ifndef PRODUCT | 823 #ifndef PRODUCT |
814 if (cfg->trace_opto_pipelining()) { | 824 if (trace_opto_pipelining()) { |
815 tty->print("# ready list:"); | 825 tty->print("# ready list:"); |
816 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | 826 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist |
817 Node *n = worklist[i]; // Get Node on worklist | 827 Node *n = worklist[i]; // Get Node on worklist |
818 tty->print(" %d", n->_idx); | 828 tty->print(" %d", n->_idx); |
819 } | 829 } |
820 tty->cr(); | 830 tty->cr(); |
821 } | 831 } |
822 #endif | 832 #endif |
823 | 833 |
824 // Select and pop a ready guy from worklist | 834 // Select and pop a ready guy from worklist |
825 Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); | 835 Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); |
826 _nodes.map(phi_cnt++,n); // Schedule him next | 836 block->map_node(n, phi_cnt++); // Schedule him next |
827 | 837 |
828 #ifndef PRODUCT | 838 #ifndef PRODUCT |
829 if (cfg->trace_opto_pipelining()) { | 839 if (trace_opto_pipelining()) { |
830 tty->print("# select %d: %s", n->_idx, n->Name()); | 840 tty->print("# select %d: %s", n->_idx, n->Name()); |
831 tty->print(", latency:%d", cfg->get_latency_for_node(n)); | 841 tty->print(", latency:%d", get_latency_for_node(n)); |
832 n->dump(); | 842 n->dump(); |
833 if (Verbose) { | 843 if (Verbose) { |
834 tty->print("# ready list:"); | 844 tty->print("# ready list:"); |
835 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist | 845 for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist |
836 Node *n = worklist[i]; // Get Node on worklist | 846 Node *n = worklist[i]; // Get Node on worklist |
841 } | 851 } |
842 | 852 |
843 #endif | 853 #endif |
844 if( n->is_MachCall() ) { | 854 if( n->is_MachCall() ) { |
845 MachCallNode *mcall = n->as_MachCall(); | 855 MachCallNode *mcall = n->as_MachCall(); |
846 phi_cnt = sched_call(matcher, cfg, phi_cnt, worklist, ready_cnt, mcall, next_call); | 856 phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); |
847 continue; | 857 continue; |
848 } | 858 } |
849 | 859 |
850 if (n->is_Mach() && n->as_Mach()->has_call()) { | 860 if (n->is_Mach() && n->as_Mach()->has_call()) { |
851 RegMask regs; | 861 RegMask regs; |
852 regs.Insert(matcher.c_frame_pointer()); | 862 regs.Insert(_matcher.c_frame_pointer()); |
853 regs.OR(n->out_RegMask()); | 863 regs.OR(n->out_RegMask()); |
854 | 864 |
855 MachProjNode *proj = new (matcher.C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); | 865 MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); |
856 cfg->map_node_to_block(proj, this); | 866 map_node_to_block(proj, block); |
857 _nodes.insert(phi_cnt++, proj); | 867 block->insert_node(proj, phi_cnt++); |
858 | 868 |
859 add_call_kills(proj, regs, matcher._c_reg_save_policy, false); | 869 add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); |
860 } | 870 } |
861 | 871 |
862 // Children are now all ready | 872 // Children are now all ready |
863 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { | 873 for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { |
864 Node* m = n->fast_out(i5); // Get user | 874 Node* m = n->fast_out(i5); // Get user |
865 if (cfg->get_block_for_node(m) != this) { | 875 if (get_block_for_node(m) != block) { |
866 continue; | 876 continue; |
867 } | 877 } |
868 if( m->is_Phi() ) continue; | 878 if( m->is_Phi() ) continue; |
869 if (m->_idx >= max_idx) { // new node, skip it | 879 if (m->_idx >= max_idx) { // new node, skip it |
870 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); | 880 assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); |
875 if( m_cnt == 0 ) | 885 if( m_cnt == 0 ) |
876 worklist.push(m); | 886 worklist.push(m); |
877 } | 887 } |
878 } | 888 } |
879 | 889 |
880 if( phi_cnt != end_idx() ) { | 890 if( phi_cnt != block->end_idx() ) { |
881 // did not schedule all. Retry, Bailout, or Die | 891 // did not schedule all. Retry, Bailout, or Die |
882 Compile* C = matcher.C; | |
883 if (C->subsume_loads() == true && !C->failing()) { | 892 if (C->subsume_loads() == true && !C->failing()) { |
884 // Retry with subsume_loads == false | 893 // Retry with subsume_loads == false |
885 // If this is the first failure, the sentinel string will "stick" | 894 // If this is the first failure, the sentinel string will "stick" |
886 // to the Compile object, and the C2Compiler will see it and retry. | 895 // to the Compile object, and the C2Compiler will see it and retry. |
887 C->record_failure(C2Compiler::retry_no_subsuming_loads()); | 896 C->record_failure(C2Compiler::retry_no_subsuming_loads()); |
889 // assert( phi_cnt == end_idx(), "did not schedule all" ); | 898 // assert( phi_cnt == end_idx(), "did not schedule all" ); |
890 return false; | 899 return false; |
891 } | 900 } |
892 | 901 |
893 #ifndef PRODUCT | 902 #ifndef PRODUCT |
894 if (cfg->trace_opto_pipelining()) { | 903 if (trace_opto_pipelining()) { |
895 tty->print_cr("#"); | 904 tty->print_cr("#"); |
896 tty->print_cr("# after schedule_local"); | 905 tty->print_cr("# after schedule_local"); |
897 for (uint i = 0;i < _nodes.size();i++) { | 906 for (uint i = 0;i < block->number_of_nodes();i++) { |
898 tty->print("# "); | 907 tty->print("# "); |
899 _nodes[i]->fast_dump(); | 908 block->get_node(i)->fast_dump(); |
900 } | 909 } |
901 tty->cr(); | 910 tty->cr(); |
902 } | 911 } |
903 #endif | 912 #endif |
904 | 913 |
920 } | 929 } |
921 } | 930 } |
922 } | 931 } |
923 | 932 |
924 //------------------------------catch_cleanup_find_cloned_def------------------ | 933 //------------------------------catch_cleanup_find_cloned_def------------------ |
925 static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { | 934 Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) { |
926 assert( use_blk != def_blk, "Inter-block cleanup only"); | 935 assert( use_blk != def_blk, "Inter-block cleanup only"); |
927 | 936 |
928 // The use is some block below the Catch. Find and return the clone of the def | 937 // The use is some block below the Catch. Find and return the clone of the def |
929 // that dominates the use. If there is no clone in a dominating block, then | 938 // that dominates the use. If there is no clone in a dominating block, then |
930 // create a phi for the def in a dominating block. | 939 // create a phi for the def in a dominating block. |
946 if( j == def_blk->_num_succs ) { | 955 if( j == def_blk->_num_succs ) { |
947 // Block at same level in dom-tree is not a successor. It needs a | 956 // Block at same level in dom-tree is not a successor. It needs a |
948 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. | 957 // PhiNode, the PhiNode uses from the def and IT's uses need fixup. |
949 Node_Array inputs = new Node_List(Thread::current()->resource_area()); | 958 Node_Array inputs = new Node_List(Thread::current()->resource_area()); |
950 for(uint k = 1; k < use_blk->num_preds(); k++) { | 959 for(uint k = 1; k < use_blk->num_preds(); k++) { |
951 Block* block = cfg->get_block_for_node(use_blk->pred(k)); | 960 Block* block = get_block_for_node(use_blk->pred(k)); |
952 inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, cfg, n_clone_idx)); | 961 inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx)); |
953 } | 962 } |
954 | 963 |
955 // Check to see if the use_blk already has an identical phi inserted. | 964 // Check to see if the use_blk already has an identical phi inserted. |
956 // If it exists, it will be at the first position since all uses of a | 965 // If it exists, it will be at the first position since all uses of a |
957 // def are processed together. | 966 // def are processed together. |
958 Node *phi = use_blk->_nodes[1]; | 967 Node *phi = use_blk->get_node(1); |
959 if( phi->is_Phi() ) { | 968 if( phi->is_Phi() ) { |
960 fixup = phi; | 969 fixup = phi; |
961 for (uint k = 1; k < use_blk->num_preds(); k++) { | 970 for (uint k = 1; k < use_blk->num_preds(); k++) { |
962 if (phi->in(k) != inputs[k]) { | 971 if (phi->in(k) != inputs[k]) { |
963 // Not a match | 972 // Not a match |
968 } | 977 } |
969 | 978 |
970 // If an existing PhiNode was not found, make a new one. | 979 // If an existing PhiNode was not found, make a new one. |
971 if (fixup == NULL) { | 980 if (fixup == NULL) { |
972 Node *new_phi = PhiNode::make(use_blk->head(), def); | 981 Node *new_phi = PhiNode::make(use_blk->head(), def); |
973 use_blk->_nodes.insert(1, new_phi); | 982 use_blk->insert_node(new_phi, 1); |
974 cfg->map_node_to_block(new_phi, use_blk); | 983 map_node_to_block(new_phi, use_blk); |
975 for (uint k = 1; k < use_blk->num_preds(); k++) { | 984 for (uint k = 1; k < use_blk->num_preds(); k++) { |
976 new_phi->set_req(k, inputs[k]); | 985 new_phi->set_req(k, inputs[k]); |
977 } | 986 } |
978 fixup = new_phi; | 987 fixup = new_phi; |
979 } | 988 } |
980 | 989 |
981 } else { | 990 } else { |
982 // Found the use just below the Catch. Make it use the clone. | 991 // Found the use just below the Catch. Make it use the clone. |
983 fixup = use_blk->_nodes[n_clone_idx]; | 992 fixup = use_blk->get_node(n_clone_idx); |
984 } | 993 } |
985 | 994 |
986 return fixup; | 995 return fixup; |
987 } | 996 } |
988 | 997 |
998 uint use_idx = blk->find_node(use); | 1007 uint use_idx = blk->find_node(use); |
999 uint offset_idx = use_idx - beg; | 1008 uint offset_idx = use_idx - beg; |
1000 for( uint k = 0; k < blk->_num_succs; k++ ) { | 1009 for( uint k = 0; k < blk->_num_succs; k++ ) { |
1001 // Get clone in each successor block | 1010 // Get clone in each successor block |
1002 Block *sb = blk->_succs[k]; | 1011 Block *sb = blk->_succs[k]; |
1003 Node *clone = sb->_nodes[offset_idx+1]; | 1012 Node *clone = sb->get_node(offset_idx+1); |
1004 assert( clone->Opcode() == use->Opcode(), "" ); | 1013 assert( clone->Opcode() == use->Opcode(), "" ); |
1005 | 1014 |
1006 // Make use-clone reference the def-clone | 1015 // Make use-clone reference the def-clone |
1007 catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]); | 1016 catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx)); |
1008 } | 1017 } |
1009 } | 1018 } |
1010 | 1019 |
1011 //------------------------------catch_cleanup_inter_block--------------------- | 1020 //------------------------------catch_cleanup_inter_block--------------------- |
1012 // Fix all input edges in use that reference "def". The use is in a different | 1021 // Fix all input edges in use that reference "def". The use is in a different |
1013 // block than the def. | 1022 // block than the def. |
1014 static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { | 1023 void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) { |
1015 if( !use_blk ) return; // Can happen if the use is a precedence edge | 1024 if( !use_blk ) return; // Can happen if the use is a precedence edge |
1016 | 1025 |
1017 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, cfg, n_clone_idx); | 1026 Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx); |
1018 catch_cleanup_fix_all_inputs(use, def, new_def); | 1027 catch_cleanup_fix_all_inputs(use, def, new_def); |
1019 } | 1028 } |
1020 | 1029 |
1021 //------------------------------call_catch_cleanup----------------------------- | 1030 //------------------------------call_catch_cleanup----------------------------- |
1022 // If we inserted any instructions between a Call and his CatchNode, | 1031 // If we inserted any instructions between a Call and his CatchNode, |
1023 // clone the instructions on all paths below the Catch. | 1032 // clone the instructions on all paths below the Catch. |
1024 void Block::call_catch_cleanup(PhaseCFG* cfg, Compile* C) { | 1033 void PhaseCFG::call_catch_cleanup(Block* block) { |
1025 | 1034 |
1026 // End of region to clone | 1035 // End of region to clone |
1027 uint end = end_idx(); | 1036 uint end = block->end_idx(); |
1028 if( !_nodes[end]->is_Catch() ) return; | 1037 if( !block->get_node(end)->is_Catch() ) return; |
1029 // Start of region to clone | 1038 // Start of region to clone |
1030 uint beg = end; | 1039 uint beg = end; |
1031 while(!_nodes[beg-1]->is_MachProj() || | 1040 while(!block->get_node(beg-1)->is_MachProj() || |
1032 !_nodes[beg-1]->in(0)->is_MachCall() ) { | 1041 !block->get_node(beg-1)->in(0)->is_MachCall() ) { |
1033 beg--; | 1042 beg--; |
1034 assert(beg > 0,"Catch cleanup walking beyond block boundary"); | 1043 assert(beg > 0,"Catch cleanup walking beyond block boundary"); |
1035 } | 1044 } |
1036 // Range of inserted instructions is [beg, end) | 1045 // Range of inserted instructions is [beg, end) |
1037 if( beg == end ) return; | 1046 if( beg == end ) return; |
1038 | 1047 |
1039 // Clone along all Catch output paths. Clone area between the 'beg' and | 1048 // Clone along all Catch output paths. Clone area between the 'beg' and |
1040 // 'end' indices. | 1049 // 'end' indices. |
1041 for( uint i = 0; i < _num_succs; i++ ) { | 1050 for( uint i = 0; i < block->_num_succs; i++ ) { |
1042 Block *sb = _succs[i]; | 1051 Block *sb = block->_succs[i]; |
1043 // Clone the entire area; ignoring the edge fixup for now. | 1052 // Clone the entire area; ignoring the edge fixup for now. |
1044 for( uint j = end; j > beg; j-- ) { | 1053 for( uint j = end; j > beg; j-- ) { |
1045 // It is safe here to clone a node with anti_dependence | 1054 // It is safe here to clone a node with anti_dependence |
1046 // since clones dominate on each path. | 1055 // since clones dominate on each path. |
1047 Node *clone = _nodes[j-1]->clone(); | 1056 Node *clone = block->get_node(j-1)->clone(); |
1048 sb->_nodes.insert( 1, clone ); | 1057 sb->insert_node(clone, 1); |
1049 cfg->map_node_to_block(clone, sb); | 1058 map_node_to_block(clone, sb); |
1050 } | 1059 } |
1051 } | 1060 } |
1052 | 1061 |
1053 | 1062 |
1054 // Fixup edges. Check the def-use info per cloned Node | 1063 // Fixup edges. Check the def-use info per cloned Node |
1055 for(uint i2 = beg; i2 < end; i2++ ) { | 1064 for(uint i2 = beg; i2 < end; i2++ ) { |
1056 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block | 1065 uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block |
1057 Node *n = _nodes[i2]; // Node that got cloned | 1066 Node *n = block->get_node(i2); // Node that got cloned |
1058 // Need DU safe iterator because of edge manipulation in calls. | 1067 // Need DU safe iterator because of edge manipulation in calls. |
1059 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); | 1068 Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); |
1060 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { | 1069 for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { |
1061 out->push(n->fast_out(j1)); | 1070 out->push(n->fast_out(j1)); |
1062 } | 1071 } |
1063 uint max = out->size(); | 1072 uint max = out->size(); |
1064 for (uint j = 0; j < max; j++) {// For all users | 1073 for (uint j = 0; j < max; j++) {// For all users |
1065 Node *use = out->pop(); | 1074 Node *use = out->pop(); |
1066 Block *buse = cfg->get_block_for_node(use); | 1075 Block *buse = get_block_for_node(use); |
1067 if( use->is_Phi() ) { | 1076 if( use->is_Phi() ) { |
1068 for( uint k = 1; k < use->req(); k++ ) | 1077 for( uint k = 1; k < use->req(); k++ ) |
1069 if( use->in(k) == n ) { | 1078 if( use->in(k) == n ) { |
1070 Block* block = cfg->get_block_for_node(buse->pred(k)); | 1079 Block* b = get_block_for_node(buse->pred(k)); |
1071 Node *fixup = catch_cleanup_find_cloned_def(block, n, this, cfg, n_clone_idx); | 1080 Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx); |
1072 use->set_req(k, fixup); | 1081 use->set_req(k, fixup); |
1073 } | 1082 } |
1074 } else { | 1083 } else { |
1075 if (this == buse) { | 1084 if (block == buse) { |
1076 catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); | 1085 catch_cleanup_intra_block(use, n, block, beg, n_clone_idx); |
1077 } else { | 1086 } else { |
1078 catch_cleanup_inter_block(use, buse, n, this, cfg, n_clone_idx); | 1087 catch_cleanup_inter_block(use, buse, n, block, n_clone_idx); |
1079 } | 1088 } |
1080 } | 1089 } |
1081 } // End for all users | 1090 } // End for all users |
1082 | 1091 |
1083 } // End of for all Nodes in cloned area | 1092 } // End of for all Nodes in cloned area |
1084 | 1093 |
1085 // Remove the now-dead cloned ops | 1094 // Remove the now-dead cloned ops |
1086 for(uint i3 = beg; i3 < end; i3++ ) { | 1095 for(uint i3 = beg; i3 < end; i3++ ) { |
1087 _nodes[beg]->disconnect_inputs(NULL, C); | 1096 block->get_node(beg)->disconnect_inputs(NULL, C); |
1088 _nodes.remove(beg); | 1097 block->remove_node(beg); |
1089 } | 1098 } |
1090 | 1099 |
1091 // If the successor blocks have a CreateEx node, move it back to the top | 1100 // If the successor blocks have a CreateEx node, move it back to the top |
1092 for(uint i4 = 0; i4 < _num_succs; i4++ ) { | 1101 for(uint i4 = 0; i4 < block->_num_succs; i4++ ) { |
1093 Block *sb = _succs[i4]; | 1102 Block *sb = block->_succs[i4]; |
1094 uint new_cnt = end - beg; | 1103 uint new_cnt = end - beg; |
1095 // Remove any newly created, but dead, nodes. | 1104 // Remove any newly created, but dead, nodes. |
1096 for( uint j = new_cnt; j > 0; j-- ) { | 1105 for( uint j = new_cnt; j > 0; j-- ) { |
1097 Node *n = sb->_nodes[j]; | 1106 Node *n = sb->get_node(j); |
1098 if (n->outcnt() == 0 && | 1107 if (n->outcnt() == 0 && |
1099 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ | 1108 (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ |
1100 n->disconnect_inputs(NULL, C); | 1109 n->disconnect_inputs(NULL, C); |
1101 sb->_nodes.remove(j); | 1110 sb->remove_node(j); |
1102 new_cnt--; | 1111 new_cnt--; |
1103 } | 1112 } |
1104 } | 1113 } |
1105 // If any newly created nodes remain, move the CreateEx node to the top | 1114 // If any newly created nodes remain, move the CreateEx node to the top |
1106 if (new_cnt > 0) { | 1115 if (new_cnt > 0) { |
1107 Node *cex = sb->_nodes[1+new_cnt]; | 1116 Node *cex = sb->get_node(1+new_cnt); |
1108 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { | 1117 if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { |
1109 sb->_nodes.remove(1+new_cnt); | 1118 sb->remove_node(1+new_cnt); |
1110 sb->_nodes.insert(1,cex); | 1119 sb->insert_node(cex, 1); |
1111 } | 1120 } |
1112 } | 1121 } |
1113 } | 1122 } |
1114 } | 1123 } |