Mercurial > hg > graal-compiler
comparison src/share/vm/opto/output.cpp @ 14412:e2722a66aba7
Merge
author | kvn |
---|---|
date | Thu, 05 Sep 2013 11:04:39 -0700 |
parents | 75ef1a499665 adb9a7d94cb5 |
children | 2b8e28fdf503 |
comparison
equal
deleted
inserted
replaced
14411:bdd155477289 | 14412:e2722a66aba7 |
---|---|
52 #endif | 52 #endif |
53 | 53 |
54 extern int emit_exception_handler(CodeBuffer &cbuf); | 54 extern int emit_exception_handler(CodeBuffer &cbuf); |
55 extern int emit_deopt_handler(CodeBuffer &cbuf); | 55 extern int emit_deopt_handler(CodeBuffer &cbuf); |
56 | 56 |
57 //------------------------------Output----------------------------------------- | |
58 // Convert Nodes to instruction bits and pass off to the VM | 57 // Convert Nodes to instruction bits and pass off to the VM |
59 void Compile::Output() { | 58 void Compile::Output() { |
60 // RootNode goes | 59 // RootNode goes |
61 assert( _cfg->_broot->_nodes.size() == 0, "" ); | 60 assert( _cfg->get_root_block()->_nodes.size() == 0, "" ); |
62 | 61 |
63 // The number of new nodes (mostly MachNop) is proportional to | 62 // The number of new nodes (mostly MachNop) is proportional to |
64 // the number of java calls and inner loops which are aligned. | 63 // the number of java calls and inner loops which are aligned. |
65 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + | 64 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + |
66 C->inner_loops()*(OptoLoopAlignment-1)), | 65 C->inner_loops()*(OptoLoopAlignment-1)), |
67 "out of nodes before code generation" ) ) { | 66 "out of nodes before code generation" ) ) { |
68 return; | 67 return; |
69 } | 68 } |
70 // Make sure I can find the Start Node | 69 // Make sure I can find the Start Node |
71 Block_Array& bbs = _cfg->_bbs; | 70 Block *entry = _cfg->get_block(1); |
72 Block *entry = _cfg->_blocks[1]; | 71 Block *broot = _cfg->get_root_block(); |
73 Block *broot = _cfg->_broot; | |
74 | 72 |
75 const StartNode *start = entry->_nodes[0]->as_Start(); | 73 const StartNode *start = entry->_nodes[0]->as_Start(); |
76 | 74 |
77 // Replace StartNode with prolog | 75 // Replace StartNode with prolog |
78 MachPrologNode *prolog = new (this) MachPrologNode(); | 76 MachPrologNode *prolog = new (this) MachPrologNode(); |
79 entry->_nodes.map( 0, prolog ); | 77 entry->_nodes.map( 0, prolog ); |
80 bbs.map( prolog->_idx, entry ); | 78 _cfg->map_node_to_block(prolog, entry); |
81 bbs.map( start->_idx, NULL ); // start is no longer in any block | 79 _cfg->unmap_node_from_block(start); // start is no longer in any block |
82 | 80 |
83 // Virtual methods need an unverified entry point | 81 // Virtual methods need an unverified entry point |
84 | 82 |
85 if( is_osr_compilation() ) { | 83 if( is_osr_compilation() ) { |
86 if( PoisonOSREntry ) { | 84 if( PoisonOSREntry ) { |
108 // runtime stubs or frame converters | 106 // runtime stubs or frame converters |
109 _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); | 107 _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); |
110 } | 108 } |
111 | 109 |
112 // Insert epilogs before every return | 110 // Insert epilogs before every return |
113 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | 111 for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
114 Block *b = _cfg->_blocks[i]; | 112 Block* block = _cfg->get_block(i); |
115 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? | 113 if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point? |
116 Node *m = b->end(); | 114 Node* m = block->end(); |
117 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { | 115 if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) { |
118 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); | 116 MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); |
119 b->add_inst( epilog ); | 117 block->add_inst(epilog); |
120 bbs.map(epilog->_idx, b); | 118 _cfg->map_node_to_block(epilog, block); |
121 //_regalloc->set_bad(epilog->_idx); // Already initialized this way. | |
122 } | 119 } |
123 } | 120 } |
124 } | 121 } |
125 | 122 |
126 # ifdef ENABLE_ZAP_DEAD_LOCALS | 123 # ifdef ENABLE_ZAP_DEAD_LOCALS |
127 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); | 124 if (ZapDeadCompiledLocals) { |
125 Insert_zap_nodes(); | |
126 } | |
128 # endif | 127 # endif |
129 | 128 |
130 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); | 129 uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1); |
131 blk_starts[0] = 0; | 130 blk_starts[0] = 0; |
132 | 131 |
133 // Initialize code buffer and process short branches. | 132 // Initialize code buffer and process short branches. |
134 CodeBuffer* cb = init_buffer(blk_starts); | 133 CodeBuffer* cb = init_buffer(blk_starts); |
135 | 134 |
136 if (cb == NULL || failing()) return; | 135 if (cb == NULL || failing()) { |
136 return; | |
137 } | |
137 | 138 |
138 ScheduleAndBundle(); | 139 ScheduleAndBundle(); |
139 | 140 |
140 #ifndef PRODUCT | 141 #ifndef PRODUCT |
141 if (trace_opto_output()) { | 142 if (trace_opto_output()) { |
142 tty->print("\n---- After ScheduleAndBundle ----\n"); | 143 tty->print("\n---- After ScheduleAndBundle ----\n"); |
143 for (uint i = 0; i < _cfg->_num_blocks; i++) { | 144 for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
144 tty->print("\nBB#%03d:\n", i); | 145 tty->print("\nBB#%03d:\n", i); |
145 Block *bb = _cfg->_blocks[i]; | 146 Block* block = _cfg->get_block(i); |
146 for (uint j = 0; j < bb->_nodes.size(); j++) { | 147 for (uint j = 0; j < block->_nodes.size(); j++) { |
147 Node *n = bb->_nodes[j]; | 148 Node* n = block->_nodes[j]; |
148 OptoReg::Name reg = _regalloc->get_reg_first(n); | 149 OptoReg::Name reg = _regalloc->get_reg_first(n); |
149 tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); | 150 tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); |
150 n->dump(); | 151 n->dump(); |
151 } | 152 } |
152 } | 153 } |
153 } | 154 } |
154 #endif | 155 #endif |
155 | 156 |
156 if (failing()) return; | 157 if (failing()) { |
158 return; | |
159 } | |
157 | 160 |
158 BuildOopMaps(); | 161 BuildOopMaps(); |
159 | 162 |
160 if (failing()) return; | 163 if (failing()) { |
164 return; | |
165 } | |
161 | 166 |
162 fill_buffer(cb, blk_starts); | 167 fill_buffer(cb, blk_starts); |
163 } | 168 } |
164 | 169 |
165 bool Compile::need_stack_bang(int frame_size_in_bytes) const { | 170 bool Compile::need_stack_bang(int frame_size_in_bytes) const { |
217 | 222 |
218 if ( _method == NULL ) | 223 if ( _method == NULL ) |
219 return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care | 224 return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care |
220 | 225 |
221 // Insert call to zap runtime stub before every node with an oop map | 226 // Insert call to zap runtime stub before every node with an oop map |
222 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | 227 for( uint i=0; i<_cfg->number_of_blocks(); i++ ) { |
223 Block *b = _cfg->_blocks[i]; | 228 Block *b = _cfg->get_block(i); |
224 for ( uint j = 0; j < b->_nodes.size(); ++j ) { | 229 for ( uint j = 0; j < b->_nodes.size(); ++j ) { |
225 Node *n = b->_nodes[j]; | 230 Node *n = b->_nodes[j]; |
226 | 231 |
227 // Determining if we should insert a zap-a-lot node in output. | 232 // Determining if we should insert a zap-a-lot node in output. |
228 // We do that for all nodes that has oopmap info, except for calls | 233 // We do that for all nodes that has oopmap info, except for calls |
250 } | 255 } |
251 } | 256 } |
252 if (insert) { | 257 if (insert) { |
253 Node *zap = call_zap_node(n->as_MachSafePoint(), i); | 258 Node *zap = call_zap_node(n->as_MachSafePoint(), i); |
254 b->_nodes.insert( j, zap ); | 259 b->_nodes.insert( j, zap ); |
255 _cfg->_bbs.map( zap->_idx, b ); | 260 _cfg->map_node_to_block(zap, b); |
256 ++j; | 261 ++j; |
257 } | 262 } |
258 } | 263 } |
259 } | 264 } |
260 } | 265 } |
275 // Add the cloned OopMap to the zap node | 280 // Add the cloned OopMap to the zap node |
276 ideal_node->set_oop_map(clone); | 281 ideal_node->set_oop_map(clone); |
277 return _matcher->match_sfpt(ideal_node); | 282 return _matcher->match_sfpt(ideal_node); |
278 } | 283 } |
279 | 284 |
280 //------------------------------is_node_getting_a_safepoint-------------------- | |
281 bool Compile::is_node_getting_a_safepoint( Node* n) { | 285 bool Compile::is_node_getting_a_safepoint( Node* n) { |
282 // This code duplicates the logic prior to the call of add_safepoint | 286 // This code duplicates the logic prior to the call of add_safepoint |
283 // below in this file. | 287 // below in this file. |
284 if( n->is_MachSafePoint() ) return true; | 288 if( n->is_MachSafePoint() ) return true; |
285 return false; | 289 return false; |
286 } | 290 } |
287 | 291 |
288 # endif // ENABLE_ZAP_DEAD_LOCALS | 292 # endif // ENABLE_ZAP_DEAD_LOCALS |
289 | 293 |
290 //------------------------------compute_loop_first_inst_sizes------------------ | |
291 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top | 294 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top |
292 // of a loop. When aligning a loop we need to provide enough instructions | 295 // of a loop. When aligning a loop we need to provide enough instructions |
293 // in cpu's fetch buffer to feed decoders. The loop alignment could be | 296 // in cpu's fetch buffer to feed decoders. The loop alignment could be |
294 // avoided if we have enough instructions in fetch buffer at the head of a loop. | 297 // avoided if we have enough instructions in fetch buffer at the head of a loop. |
295 // By default, the size is set to 999999 by Block's constructor so that | 298 // By default, the size is set to 999999 by Block's constructor so that |
302 // The next condition is used to gate the loop alignment optimization. | 305 // The next condition is used to gate the loop alignment optimization. |
303 // Don't aligned a loop if there are enough instructions at the head of a loop | 306 // Don't aligned a loop if there are enough instructions at the head of a loop |
304 // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad | 307 // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad |
305 // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is | 308 // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is |
306 // equal to 11 bytes which is the largest address NOP instruction. | 309 // equal to 11 bytes which is the largest address NOP instruction. |
307 if( MaxLoopPad < OptoLoopAlignment-1 ) { | 310 if (MaxLoopPad < OptoLoopAlignment - 1) { |
308 uint last_block = _cfg->_num_blocks-1; | 311 uint last_block = _cfg->number_of_blocks() - 1; |
309 for( uint i=1; i <= last_block; i++ ) { | 312 for (uint i = 1; i <= last_block; i++) { |
310 Block *b = _cfg->_blocks[i]; | 313 Block* block = _cfg->get_block(i); |
311 // Check the first loop's block which requires an alignment. | 314 // Check the first loop's block which requires an alignment. |
312 if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { | 315 if (block->loop_alignment() > (uint)relocInfo::addr_unit()) { |
313 uint sum_size = 0; | 316 uint sum_size = 0; |
314 uint inst_cnt = NumberOfLoopInstrToAlign; | 317 uint inst_cnt = NumberOfLoopInstrToAlign; |
315 inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); | 318 inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
316 | 319 |
317 // Check subsequent fallthrough blocks if the loop's first | 320 // Check subsequent fallthrough blocks if the loop's first |
318 // block(s) does not have enough instructions. | 321 // block(s) does not have enough instructions. |
319 Block *nb = b; | 322 Block *nb = block; |
320 while( inst_cnt > 0 && | 323 while(inst_cnt > 0 && |
321 i < last_block && | 324 i < last_block && |
322 !_cfg->_blocks[i+1]->has_loop_alignment() && | 325 !_cfg->get_block(i + 1)->has_loop_alignment() && |
323 !nb->has_successor(b) ) { | 326 !nb->has_successor(block)) { |
324 i++; | 327 i++; |
325 nb = _cfg->_blocks[i]; | 328 nb = _cfg->get_block(i); |
326 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); | 329 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
327 } // while( inst_cnt > 0 && i < last_block ) | 330 } // while( inst_cnt > 0 && i < last_block ) |
328 | 331 |
329 b->set_first_inst_size(sum_size); | 332 block->set_first_inst_size(sum_size); |
330 } // f( b->head()->is_Loop() ) | 333 } // f( b->head()->is_Loop() ) |
331 } // for( i <= last_block ) | 334 } // for( i <= last_block ) |
332 } // if( MaxLoopPad < OptoLoopAlignment-1 ) | 335 } // if( MaxLoopPad < OptoLoopAlignment-1 ) |
333 } | 336 } |
334 | 337 |
335 //----------------------shorten_branches--------------------------------------- | |
336 // The architecture description provides short branch variants for some long | 338 // The architecture description provides short branch variants for some long |
337 // branch instructions. Replace eligible long branches with short branches. | 339 // branch instructions. Replace eligible long branches with short branches. |
338 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { | 340 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { |
339 | |
340 // ------------------ | |
341 // Compute size of each block, method size, and relocation information size | 341 // Compute size of each block, method size, and relocation information size |
342 uint nblocks = _cfg->_num_blocks; | 342 uint nblocks = _cfg->number_of_blocks(); |
343 | 343 |
344 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); | 344 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); |
345 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); | 345 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); |
346 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); | 346 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); |
347 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) | 347 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) |
364 // Step one, perform a pessimistic sizing pass. | 364 // Step one, perform a pessimistic sizing pass. |
365 uint last_call_adr = max_uint; | 365 uint last_call_adr = max_uint; |
366 uint last_avoid_back_to_back_adr = max_uint; | 366 uint last_avoid_back_to_back_adr = max_uint; |
367 uint nop_size = (new (this) MachNopNode())->size(_regalloc); | 367 uint nop_size = (new (this) MachNopNode())->size(_regalloc); |
368 for (uint i = 0; i < nblocks; i++) { // For all blocks | 368 for (uint i = 0; i < nblocks; i++) { // For all blocks |
369 Block *b = _cfg->_blocks[i]; | 369 Block* block = _cfg->get_block(i); |
370 | 370 |
371 // During short branch replacement, we store the relative (to blk_starts) | 371 // During short branch replacement, we store the relative (to blk_starts) |
372 // offset of jump in jmp_offset, rather than the absolute offset of jump. | 372 // offset of jump in jmp_offset, rather than the absolute offset of jump. |
373 // This is so that we do not need to recompute sizes of all nodes when | 373 // This is so that we do not need to recompute sizes of all nodes when |
374 // we compute correct blk_starts in our next sizing pass. | 374 // we compute correct blk_starts in our next sizing pass. |
377 jmp_nidx[i] = -1; | 377 jmp_nidx[i] = -1; |
378 DEBUG_ONLY( jmp_target[i] = 0; ) | 378 DEBUG_ONLY( jmp_target[i] = 0; ) |
379 DEBUG_ONLY( jmp_rule[i] = 0; ) | 379 DEBUG_ONLY( jmp_rule[i] = 0; ) |
380 | 380 |
381 // Sum all instruction sizes to compute block size | 381 // Sum all instruction sizes to compute block size |
382 uint last_inst = b->_nodes.size(); | 382 uint last_inst = block->_nodes.size(); |
383 uint blk_size = 0; | 383 uint blk_size = 0; |
384 for (uint j = 0; j < last_inst; j++) { | 384 for (uint j = 0; j < last_inst; j++) { |
385 Node* nj = b->_nodes[j]; | 385 Node* nj = block->_nodes[j]; |
386 // Handle machine instruction nodes | 386 // Handle machine instruction nodes |
387 if (nj->is_Mach()) { | 387 if (nj->is_Mach()) { |
388 MachNode *mach = nj->as_Mach(); | 388 MachNode *mach = nj->as_Mach(); |
389 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding | 389 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding |
390 reloc_size += mach->reloc(); | 390 reloc_size += mach->reloc(); |
441 } | 441 } |
442 | 442 |
443 // When the next block starts a loop, we may insert pad NOP | 443 // When the next block starts a loop, we may insert pad NOP |
444 // instructions. Since we cannot know our future alignment, | 444 // instructions. Since we cannot know our future alignment, |
445 // assume the worst. | 445 // assume the worst. |
446 if (i< nblocks-1) { | 446 if (i < nblocks - 1) { |
447 Block *nb = _cfg->_blocks[i+1]; | 447 Block* nb = _cfg->get_block(i + 1); |
448 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); | 448 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); |
449 if (max_loop_pad > 0) { | 449 if (max_loop_pad > 0) { |
450 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); | 450 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); |
451 // Adjust last_call_adr and/or last_avoid_back_to_back_adr. | 451 // Adjust last_call_adr and/or last_avoid_back_to_back_adr. |
452 // If either is the last instruction in this block, bump by | 452 // If either is the last instruction in this block, bump by |
473 while (has_short_branch_candidate && progress) { | 473 while (has_short_branch_candidate && progress) { |
474 progress = false; | 474 progress = false; |
475 has_short_branch_candidate = false; | 475 has_short_branch_candidate = false; |
476 int adjust_block_start = 0; | 476 int adjust_block_start = 0; |
477 for (uint i = 0; i < nblocks; i++) { | 477 for (uint i = 0; i < nblocks; i++) { |
478 Block *b = _cfg->_blocks[i]; | 478 Block* block = _cfg->get_block(i); |
479 int idx = jmp_nidx[i]; | 479 int idx = jmp_nidx[i]; |
480 MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); | 480 MachNode* mach = (idx == -1) ? NULL: block->_nodes[idx]->as_Mach(); |
481 if (mach != NULL && mach->may_be_short_branch()) { | 481 if (mach != NULL && mach->may_be_short_branch()) { |
482 #ifdef ASSERT | 482 #ifdef ASSERT |
483 assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); | 483 assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); |
484 int j; | 484 int j; |
485 // Find the branch; ignore trailing NOPs. | 485 // Find the branch; ignore trailing NOPs. |
486 for (j = b->_nodes.size()-1; j>=0; j--) { | 486 for (j = block->_nodes.size()-1; j>=0; j--) { |
487 Node* n = b->_nodes[j]; | 487 Node* n = block->_nodes[j]; |
488 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) | 488 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) |
489 break; | 489 break; |
490 } | 490 } |
491 assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); | 491 assert(j >= 0 && j == idx && block->_nodes[j] == (Node*)mach, "sanity"); |
492 #endif | 492 #endif |
493 int br_size = jmp_size[i]; | 493 int br_size = jmp_size[i]; |
494 int br_offs = blk_starts[i] + jmp_offset[i]; | 494 int br_offs = blk_starts[i] + jmp_offset[i]; |
495 | 495 |
496 // This requires the TRUE branch target be in succs[0] | 496 // This requires the TRUE branch target be in succs[0] |
497 uint bnum = b->non_connector_successor(0)->_pre_order; | 497 uint bnum = block->non_connector_successor(0)->_pre_order; |
498 int offset = blk_starts[bnum] - br_offs; | 498 int offset = blk_starts[bnum] - br_offs; |
499 if (bnum > i) { // adjust following block's offset | 499 if (bnum > i) { // adjust following block's offset |
500 offset -= adjust_block_start; | 500 offset -= adjust_block_start; |
501 } | 501 } |
502 // In the following code a nop could be inserted before | 502 // In the following code a nop could be inserted before |
520 if (needs_padding && replacement->avoid_back_to_back()) { | 520 if (needs_padding && replacement->avoid_back_to_back()) { |
521 jmp_offset[i] += nop_size; | 521 jmp_offset[i] += nop_size; |
522 diff -= nop_size; | 522 diff -= nop_size; |
523 } | 523 } |
524 adjust_block_start += diff; | 524 adjust_block_start += diff; |
525 b->_nodes.map(idx, replacement); | 525 block->_nodes.map(idx, replacement); |
526 mach->subsume_by(replacement, C); | 526 mach->subsume_by(replacement, C); |
527 mach = replacement; | 527 mach = replacement; |
528 progress = true; | 528 progress = true; |
529 | 529 |
530 jmp_size[i] = new_size; | 530 jmp_size[i] = new_size; |
1083 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); | 1083 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); |
1084 | 1084 |
1085 if (has_mach_constant_base_node()) { | 1085 if (has_mach_constant_base_node()) { |
1086 // Fill the constant table. | 1086 // Fill the constant table. |
1087 // Note: This must happen before shorten_branches. | 1087 // Note: This must happen before shorten_branches. |
1088 for (uint i = 0; i < _cfg->_num_blocks; i++) { | 1088 for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
1089 Block* b = _cfg->_blocks[i]; | 1089 Block* b = _cfg->get_block(i); |
1090 | 1090 |
1091 for (uint j = 0; j < b->_nodes.size(); j++) { | 1091 for (uint j = 0; j < b->_nodes.size(); j++) { |
1092 Node* n = b->_nodes[j]; | 1092 Node* n = b->_nodes[j]; |
1093 | 1093 |
1094 // If the node is a MachConstantNode evaluate the constant | 1094 // If the node is a MachConstantNode evaluate the constant |
1170 _oop_map_set = new OopMapSet(); | 1170 _oop_map_set = new OopMapSet(); |
1171 | 1171 |
1172 // !!!!! This preserves old handling of oopmaps for now | 1172 // !!!!! This preserves old handling of oopmaps for now |
1173 debug_info()->set_oopmaps(_oop_map_set); | 1173 debug_info()->set_oopmaps(_oop_map_set); |
1174 | 1174 |
1175 uint nblocks = _cfg->_num_blocks; | 1175 uint nblocks = _cfg->number_of_blocks(); |
1176 // Count and start of implicit null check instructions | 1176 // Count and start of implicit null check instructions |
1177 uint inct_cnt = 0; | 1177 uint inct_cnt = 0; |
1178 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); | 1178 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); |
1179 | 1179 |
1180 // Count and start of calls | 1180 // Count and start of calls |
1218 | 1218 |
1219 // ------------------ | 1219 // ------------------ |
1220 // Now fill in the code buffer | 1220 // Now fill in the code buffer |
1221 Node *delay_slot = NULL; | 1221 Node *delay_slot = NULL; |
1222 | 1222 |
1223 for (uint i=0; i < nblocks; i++) { | 1223 for (uint i = 0; i < nblocks; i++) { |
1224 Block *b = _cfg->_blocks[i]; | 1224 Block* block = _cfg->get_block(i); |
1225 | 1225 Node* head = block->head(); |
1226 Node *head = b->head(); | |
1227 | 1226 |
1228 // If this block needs to start aligned (i.e, can be reached other | 1227 // If this block needs to start aligned (i.e, can be reached other |
1229 // than by falling-thru from the previous block), then force the | 1228 // than by falling-thru from the previous block), then force the |
1230 // start of a new bundle. | 1229 // start of a new bundle. |
1231 if (Pipeline::requires_bundling() && starts_bundle(head)) | 1230 if (Pipeline::requires_bundling() && starts_bundle(head)) { |
1232 cb->flush_bundle(true); | 1231 cb->flush_bundle(true); |
1232 } | |
1233 | 1233 |
1234 #ifdef ASSERT | 1234 #ifdef ASSERT |
1235 if (!b->is_connector()) { | 1235 if (!block->is_connector()) { |
1236 stringStream st; | 1236 stringStream st; |
1237 b->dump_head(&_cfg->_bbs, &st); | 1237 block->dump_head(_cfg, &st); |
1238 MacroAssembler(cb).block_comment(st.as_string()); | 1238 MacroAssembler(cb).block_comment(st.as_string()); |
1239 } | 1239 } |
1240 jmp_target[i] = 0; | 1240 jmp_target[i] = 0; |
1241 jmp_offset[i] = 0; | 1241 jmp_offset[i] = 0; |
1242 jmp_size[i] = 0; | 1242 jmp_size[i] = 0; |
1243 jmp_rule[i] = 0; | 1243 jmp_rule[i] = 0; |
1244 #endif | 1244 #endif |
1245 int blk_offset = current_offset; | 1245 int blk_offset = current_offset; |
1246 | 1246 |
1247 // Define the label at the beginning of the basic block | 1247 // Define the label at the beginning of the basic block |
1248 MacroAssembler(cb).bind(blk_labels[b->_pre_order]); | 1248 MacroAssembler(cb).bind(blk_labels[block->_pre_order]); |
1249 | 1249 |
1250 uint last_inst = b->_nodes.size(); | 1250 uint last_inst = block->_nodes.size(); |
1251 | 1251 |
1252 // Emit block normally, except for last instruction. | 1252 // Emit block normally, except for last instruction. |
1253 // Emit means "dump code bits into code buffer". | 1253 // Emit means "dump code bits into code buffer". |
1254 for (uint j = 0; j<last_inst; j++) { | 1254 for (uint j = 0; j<last_inst; j++) { |
1255 | 1255 |
1256 // Get the node | 1256 // Get the node |
1257 Node* n = b->_nodes[j]; | 1257 Node* n = block->_nodes[j]; |
1258 | 1258 |
1259 // See if delay slots are supported | 1259 // See if delay slots are supported |
1260 if (valid_bundle_info(n) && | 1260 if (valid_bundle_info(n) && |
1261 node_bundling(n)->used_in_unconditional_delay()) { | 1261 node_bundling(n)->used_in_unconditional_delay()) { |
1262 assert(delay_slot == NULL, "no use of delay slot node"); | 1262 assert(delay_slot == NULL, "no use of delay slot node"); |
1306 | 1306 |
1307 if(padding > 0) { | 1307 if(padding > 0) { |
1308 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); | 1308 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
1309 int nops_cnt = padding / nop_size; | 1309 int nops_cnt = padding / nop_size; |
1310 MachNode *nop = new (this) MachNopNode(nops_cnt); | 1310 MachNode *nop = new (this) MachNopNode(nops_cnt); |
1311 b->_nodes.insert(j++, nop); | 1311 block->_nodes.insert(j++, nop); |
1312 last_inst++; | 1312 last_inst++; |
1313 _cfg->_bbs.map( nop->_idx, b ); | 1313 _cfg->map_node_to_block(nop, block); |
1314 nop->emit(*cb, _regalloc); | 1314 nop->emit(*cb, _regalloc); |
1315 cb->flush_bundle(true); | 1315 cb->flush_bundle(true); |
1316 current_offset = cb->insts_size(); | 1316 current_offset = cb->insts_size(); |
1317 } | 1317 } |
1318 | 1318 |
1322 | 1322 |
1323 // This destination address is NOT PC-relative | 1323 // This destination address is NOT PC-relative |
1324 mcall->method_set((intptr_t)mcall->entry_point()); | 1324 mcall->method_set((intptr_t)mcall->entry_point()); |
1325 | 1325 |
1326 // Save the return address | 1326 // Save the return address |
1327 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); | 1327 call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset(); |
1328 | 1328 |
1329 if (mcall->is_MachCallLeaf()) { | 1329 if (mcall->is_MachCallLeaf()) { |
1330 is_mcall = false; | 1330 is_mcall = false; |
1331 is_sfn = false; | 1331 is_sfn = false; |
1332 } | 1332 } |
1359 } | 1359 } |
1360 | 1360 |
1361 // If this is a branch, then fill in the label with the target BB's label | 1361 // If this is a branch, then fill in the label with the target BB's label |
1362 else if (mach->is_MachBranch()) { | 1362 else if (mach->is_MachBranch()) { |
1363 // This requires the TRUE branch target be in succs[0] | 1363 // This requires the TRUE branch target be in succs[0] |
1364 uint block_num = b->non_connector_successor(0)->_pre_order; | 1364 uint block_num = block->non_connector_successor(0)->_pre_order; |
1365 | 1365 |
1366 // Try to replace long branch if delay slot is not used, | 1366 // Try to replace long branch if delay slot is not used, |
1367 // it is mostly for back branches since forward branch's | 1367 // it is mostly for back branches since forward branch's |
1368 // distance is not updated yet. | 1368 // distance is not updated yet. |
1369 bool delay_slot_is_used = valid_bundle_info(n) && | 1369 bool delay_slot_is_used = valid_bundle_info(n) && |
1392 int new_size = replacement->size(_regalloc); | 1392 int new_size = replacement->size(_regalloc); |
1393 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); | 1393 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); |
1394 // Insert padding between avoid_back_to_back branches. | 1394 // Insert padding between avoid_back_to_back branches. |
1395 if (needs_padding && replacement->avoid_back_to_back()) { | 1395 if (needs_padding && replacement->avoid_back_to_back()) { |
1396 MachNode *nop = new (this) MachNopNode(); | 1396 MachNode *nop = new (this) MachNopNode(); |
1397 b->_nodes.insert(j++, nop); | 1397 block->_nodes.insert(j++, nop); |
1398 _cfg->_bbs.map(nop->_idx, b); | 1398 _cfg->map_node_to_block(nop, block); |
1399 last_inst++; | 1399 last_inst++; |
1400 nop->emit(*cb, _regalloc); | 1400 nop->emit(*cb, _regalloc); |
1401 cb->flush_bundle(true); | 1401 cb->flush_bundle(true); |
1402 current_offset = cb->insts_size(); | 1402 current_offset = cb->insts_size(); |
1403 } | 1403 } |
1405 jmp_target[i] = block_num; | 1405 jmp_target[i] = block_num; |
1406 jmp_offset[i] = current_offset - blk_offset; | 1406 jmp_offset[i] = current_offset - blk_offset; |
1407 jmp_size[i] = new_size; | 1407 jmp_size[i] = new_size; |
1408 jmp_rule[i] = mach->rule(); | 1408 jmp_rule[i] = mach->rule(); |
1409 #endif | 1409 #endif |
1410 b->_nodes.map(j, replacement); | 1410 block->_nodes.map(j, replacement); |
1411 mach->subsume_by(replacement, C); | 1411 mach->subsume_by(replacement, C); |
1412 n = replacement; | 1412 n = replacement; |
1413 mach = replacement; | 1413 mach = replacement; |
1414 } | 1414 } |
1415 } | 1415 } |
1416 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); | 1416 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); |
1417 } else if (mach->ideal_Opcode() == Op_Jump) { | 1417 } else if (mach->ideal_Opcode() == Op_Jump) { |
1418 for (uint h = 0; h < b->_num_succs; h++) { | 1418 for (uint h = 0; h < block->_num_succs; h++) { |
1419 Block* succs_block = b->_succs[h]; | 1419 Block* succs_block = block->_succs[h]; |
1420 for (uint j = 1; j < succs_block->num_preds(); j++) { | 1420 for (uint j = 1; j < succs_block->num_preds(); j++) { |
1421 Node* jpn = succs_block->pred(j); | 1421 Node* jpn = succs_block->pred(j); |
1422 if (jpn->is_JumpProj() && jpn->in(0) == mach) { | 1422 if (jpn->is_JumpProj() && jpn->in(0) == mach) { |
1423 uint block_num = succs_block->non_connector()->_pre_order; | 1423 uint block_num = succs_block->non_connector()->_pre_order; |
1424 Label *blkLabel = &blk_labels[block_num]; | 1424 Label *blkLabel = &blk_labels[block_num]; |
1425 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); | 1425 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); |
1426 } | 1426 } |
1427 } | 1427 } |
1428 } | 1428 } |
1429 } | 1429 } |
1430 | |
1431 #ifdef ASSERT | 1430 #ifdef ASSERT |
1432 // Check that oop-store precedes the card-mark | 1431 // Check that oop-store precedes the card-mark |
1433 else if (mach->ideal_Opcode() == Op_StoreCM) { | 1432 else if (mach->ideal_Opcode() == Op_StoreCM) { |
1434 uint storeCM_idx = j; | 1433 uint storeCM_idx = j; |
1435 int count = 0; | 1434 int count = 0; |
1436 for (uint prec = mach->req(); prec < mach->len(); prec++) { | 1435 for (uint prec = mach->req(); prec < mach->len(); prec++) { |
1437 Node *oop_store = mach->in(prec); // Precedence edge | 1436 Node *oop_store = mach->in(prec); // Precedence edge |
1438 if (oop_store == NULL) continue; | 1437 if (oop_store == NULL) continue; |
1439 count++; | 1438 count++; |
1440 uint i4; | 1439 uint i4; |
1441 for( i4 = 0; i4 < last_inst; ++i4 ) { | 1440 for (i4 = 0; i4 < last_inst; ++i4) { |
1442 if( b->_nodes[i4] == oop_store ) break; | 1441 if (block->_nodes[i4] == oop_store) { |
1442 break; | |
1443 } | |
1443 } | 1444 } |
1444 // Note: This test can provide a false failure if other precedence | 1445 // Note: This test can provide a false failure if other precedence |
1445 // edges have been added to the storeCMNode. | 1446 // edges have been added to the storeCMNode. |
1446 assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); | 1447 assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); |
1447 } | 1448 } |
1448 assert(count > 0, "storeCM expects at least one precedence edge"); | 1449 assert(count > 0, "storeCM expects at least one precedence edge"); |
1449 } | 1450 } |
1450 #endif | 1451 #endif |
1451 | |
1452 else if (!n->is_Proj()) { | 1452 else if (!n->is_Proj()) { |
1453 // Remember the beginning of the previous instruction, in case | 1453 // Remember the beginning of the previous instruction, in case |
1454 // it's followed by a flag-kill and a null-check. Happens on | 1454 // it's followed by a flag-kill and a null-check. Happens on |
1455 // Intel all the time, with add-to-memory kind of opcodes. | 1455 // Intel all the time, with add-to-memory kind of opcodes. |
1456 previous_offset = current_offset; | 1456 previous_offset = current_offset; |
1542 } // End for all instructions in block | 1542 } // End for all instructions in block |
1543 | 1543 |
1544 // If the next block is the top of a loop, pad this block out to align | 1544 // If the next block is the top of a loop, pad this block out to align |
1545 // the loop top a little. Helps prevent pipe stalls at loop back branches. | 1545 // the loop top a little. Helps prevent pipe stalls at loop back branches. |
1546 if (i < nblocks-1) { | 1546 if (i < nblocks-1) { |
1547 Block *nb = _cfg->_blocks[i+1]; | 1547 Block *nb = _cfg->get_block(i + 1); |
1548 int padding = nb->alignment_padding(current_offset); | 1548 int padding = nb->alignment_padding(current_offset); |
1549 if( padding > 0 ) { | 1549 if( padding > 0 ) { |
1550 MachNode *nop = new (this) MachNopNode(padding / nop_size); | 1550 MachNode *nop = new (this) MachNopNode(padding / nop_size); |
1551 b->_nodes.insert( b->_nodes.size(), nop ); | 1551 block->_nodes.insert(block->_nodes.size(), nop); |
1552 _cfg->_bbs.map( nop->_idx, b ); | 1552 _cfg->map_node_to_block(nop, block); |
1553 nop->emit(*cb, _regalloc); | 1553 nop->emit(*cb, _regalloc); |
1554 current_offset = cb->insts_size(); | 1554 current_offset = cb->insts_size(); |
1555 } | 1555 } |
1556 } | 1556 } |
1557 // Verify that the distance for generated before forward | 1557 // Verify that the distance for generated before forward |
1586 assert(false, "Displacement too large for short jmp"); | 1586 assert(false, "Displacement too large for short jmp"); |
1587 } | 1587 } |
1588 } | 1588 } |
1589 } | 1589 } |
1590 #endif | 1590 #endif |
1591 | |
1592 // ------------------ | |
1593 | 1591 |
1594 #ifndef PRODUCT | 1592 #ifndef PRODUCT |
1595 // Information on the size of the method, without the extraneous code | 1593 // Information on the size of the method, without the extraneous code |
1596 Scheduling::increment_method_size(cb->insts_size()); | 1594 Scheduling::increment_method_size(cb->insts_size()); |
1597 #endif | 1595 #endif |
1649 | 1647 |
1650 void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { | 1648 void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { |
1651 _inc_table.set_size(cnt); | 1649 _inc_table.set_size(cnt); |
1652 | 1650 |
1653 uint inct_cnt = 0; | 1651 uint inct_cnt = 0; |
1654 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | 1652 for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
1655 Block *b = _cfg->_blocks[i]; | 1653 Block* block = _cfg->get_block(i); |
1656 Node *n = NULL; | 1654 Node *n = NULL; |
1657 int j; | 1655 int j; |
1658 | 1656 |
1659 // Find the branch; ignore trailing NOPs. | 1657 // Find the branch; ignore trailing NOPs. |
1660 for( j = b->_nodes.size()-1; j>=0; j-- ) { | 1658 for (j = block->_nodes.size() - 1; j >= 0; j--) { |
1661 n = b->_nodes[j]; | 1659 n = block->_nodes[j]; |
1662 if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) | 1660 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) { |
1663 break; | 1661 break; |
1662 } | |
1664 } | 1663 } |
1665 | 1664 |
1666 // If we didn't find anything, continue | 1665 // If we didn't find anything, continue |
1667 if( j < 0 ) continue; | 1666 if (j < 0) { |
1667 continue; | |
1668 } | |
1668 | 1669 |
1669 // Compute ExceptionHandlerTable subtable entry and add it | 1670 // Compute ExceptionHandlerTable subtable entry and add it |
1670 // (skip empty blocks) | 1671 // (skip empty blocks) |
1671 if( n->is_Catch() ) { | 1672 if (n->is_Catch()) { |
1672 | 1673 |
1673 // Get the offset of the return from the call | 1674 // Get the offset of the return from the call |
1674 uint call_return = call_returns[b->_pre_order]; | 1675 uint call_return = call_returns[block->_pre_order]; |
1675 #ifdef ASSERT | 1676 #ifdef ASSERT |
1676 assert( call_return > 0, "no call seen for this basic block" ); | 1677 assert( call_return > 0, "no call seen for this basic block" ); |
1677 while( b->_nodes[--j]->is_MachProj() ) ; | 1678 while (block->_nodes[--j]->is_MachProj()) ; |
1678 assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" ); | 1679 assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call"); |
1679 #endif | 1680 #endif |
1680 // last instruction is a CatchNode, find it's CatchProjNodes | 1681 // last instruction is a CatchNode, find it's CatchProjNodes |
1681 int nof_succs = b->_num_succs; | 1682 int nof_succs = block->_num_succs; |
1682 // allocate space | 1683 // allocate space |
1683 GrowableArray<intptr_t> handler_bcis(nof_succs); | 1684 GrowableArray<intptr_t> handler_bcis(nof_succs); |
1684 GrowableArray<intptr_t> handler_pcos(nof_succs); | 1685 GrowableArray<intptr_t> handler_pcos(nof_succs); |
1685 // iterate through all successors | 1686 // iterate through all successors |
1686 for (int j = 0; j < nof_succs; j++) { | 1687 for (int j = 0; j < nof_succs; j++) { |
1687 Block* s = b->_succs[j]; | 1688 Block* s = block->_succs[j]; |
1688 bool found_p = false; | 1689 bool found_p = false; |
1689 for( uint k = 1; k < s->num_preds(); k++ ) { | 1690 for (uint k = 1; k < s->num_preds(); k++) { |
1690 Node *pk = s->pred(k); | 1691 Node* pk = s->pred(k); |
1691 if( pk->is_CatchProj() && pk->in(0) == n ) { | 1692 if (pk->is_CatchProj() && pk->in(0) == n) { |
1692 const CatchProjNode* p = pk->as_CatchProj(); | 1693 const CatchProjNode* p = pk->as_CatchProj(); |
1693 found_p = true; | 1694 found_p = true; |
1694 // add the corresponding handler bci & pco information | 1695 // add the corresponding handler bci & pco information |
1695 if( p->_con != CatchProjNode::fall_through_index ) { | 1696 if (p->_con != CatchProjNode::fall_through_index) { |
1696 // p leads to an exception handler (and is not fall through) | 1697 // p leads to an exception handler (and is not fall through) |
1697 assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); | 1698 assert(s == _cfg->get_block(s->_pre_order), "bad numbering"); |
1698 // no duplicates, please | 1699 // no duplicates, please |
1699 if( !handler_bcis.contains(p->handler_bci()) ) { | 1700 if (!handler_bcis.contains(p->handler_bci())) { |
1700 uint block_num = s->non_connector()->_pre_order; | 1701 uint block_num = s->non_connector()->_pre_order; |
1701 handler_bcis.append(p->handler_bci()); | 1702 handler_bcis.append(p->handler_bci()); |
1702 handler_pcos.append(blk_labels[block_num].loc_pos()); | 1703 handler_pcos.append(blk_labels[block_num].loc_pos()); |
1703 } | 1704 } |
1704 } | 1705 } |
1713 _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); | 1714 _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); |
1714 continue; | 1715 continue; |
1715 } | 1716 } |
1716 | 1717 |
1717 // Handle implicit null exception table updates | 1718 // Handle implicit null exception table updates |
1718 if( n->is_MachNullCheck() ) { | 1719 if (n->is_MachNullCheck()) { |
1719 uint block_num = b->non_connector_successor(0)->_pre_order; | 1720 uint block_num = block->non_connector_successor(0)->_pre_order; |
1720 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); | 1721 _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); |
1721 continue; | 1722 continue; |
1722 } | 1723 } |
1723 } // End of for all blocks fill in exception table entries | 1724 } // End of for all blocks fill in exception table entries |
1724 } | 1725 } |
1725 | 1726 |
1735 // Initializer for class Scheduling | 1736 // Initializer for class Scheduling |
1736 | 1737 |
1737 Scheduling::Scheduling(Arena *arena, Compile &compile) | 1738 Scheduling::Scheduling(Arena *arena, Compile &compile) |
1738 : _arena(arena), | 1739 : _arena(arena), |
1739 _cfg(compile.cfg()), | 1740 _cfg(compile.cfg()), |
1740 _bbs(compile.cfg()->_bbs), | |
1741 _regalloc(compile.regalloc()), | 1741 _regalloc(compile.regalloc()), |
1742 _reg_node(arena), | 1742 _reg_node(arena), |
1743 _bundle_instr_count(0), | 1743 _bundle_instr_count(0), |
1744 _bundle_cycle_number(0), | 1744 _bundle_cycle_number(0), |
1745 _scheduled(arena), | 1745 _scheduled(arena), |
1775 memset(_node_latency, 0, node_max * sizeof(unsigned short)); | 1775 memset(_node_latency, 0, node_max * sizeof(unsigned short)); |
1776 memset(_uses, 0, node_max * sizeof(short)); | 1776 memset(_uses, 0, node_max * sizeof(short)); |
1777 memset(_current_latency, 0, node_max * sizeof(unsigned short)); | 1777 memset(_current_latency, 0, node_max * sizeof(unsigned short)); |
1778 | 1778 |
1779 // Clear the bundling information | 1779 // Clear the bundling information |
1780 memcpy(_bundle_use_elements, | 1780 memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements)); |
1781 Pipeline_Use::elaborated_elements, | |
1782 sizeof(Pipeline_Use::elaborated_elements)); | |
1783 | 1781 |
1784 // Get the last node | 1782 // Get the last node |
1785 Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; | 1783 Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1); |
1786 | 1784 |
1787 _next_node = bb->_nodes[bb->_nodes.size()-1]; | 1785 _next_node = block->_nodes[block->_nodes.size() - 1]; |
1788 } | 1786 } |
1789 | 1787 |
1790 #ifndef PRODUCT | 1788 #ifndef PRODUCT |
1791 // Scheduling destructor | 1789 // Scheduling destructor |
1792 Scheduling::~Scheduling() { | 1790 Scheduling::~Scheduling() { |
1832 memcpy(_bundle_use_elements, | 1830 memcpy(_bundle_use_elements, |
1833 Pipeline_Use::elaborated_elements, | 1831 Pipeline_Use::elaborated_elements, |
1834 sizeof(Pipeline_Use::elaborated_elements)); | 1832 sizeof(Pipeline_Use::elaborated_elements)); |
1835 } | 1833 } |
1836 | 1834 |
1837 //------------------------------ScheduleAndBundle------------------------------ | |
1838 // Perform instruction scheduling and bundling over the sequence of | 1835 // Perform instruction scheduling and bundling over the sequence of |
1839 // instructions in backwards order. | 1836 // instructions in backwards order. |
1840 void Compile::ScheduleAndBundle() { | 1837 void Compile::ScheduleAndBundle() { |
1841 | 1838 |
1842 // Don't optimize this if it isn't a method | 1839 // Don't optimize this if it isn't a method |
1859 // Walk backwards over each basic block, computing the needed alignment | 1856 // Walk backwards over each basic block, computing the needed alignment |
1860 // Walk over all the basic blocks | 1857 // Walk over all the basic blocks |
1861 scheduling.DoScheduling(); | 1858 scheduling.DoScheduling(); |
1862 } | 1859 } |
1863 | 1860 |
1864 //------------------------------ComputeLocalLatenciesForward------------------- | |
1865 // Compute the latency of all the instructions. This is fairly simple, | 1861 // Compute the latency of all the instructions. This is fairly simple, |
1866 // because we already have a legal ordering. Walk over the instructions | 1862 // because we already have a legal ordering. Walk over the instructions |
1867 // from first to last, and compute the latency of the instruction based | 1863 // from first to last, and compute the latency of the instruction based |
1868 // on the latency of the preceding instruction(s). | 1864 // on the latency of the preceding instruction(s). |
1869 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { | 1865 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { |
2029 #endif | 2025 #endif |
2030 | 2026 |
2031 return _available[0]; | 2027 return _available[0]; |
2032 } | 2028 } |
2033 | 2029 |
2034 //------------------------------AddNodeToAvailableList------------------------- | |
2035 void Scheduling::AddNodeToAvailableList(Node *n) { | 2030 void Scheduling::AddNodeToAvailableList(Node *n) { |
2036 assert( !n->is_Proj(), "projections never directly made available" ); | 2031 assert( !n->is_Proj(), "projections never directly made available" ); |
2037 #ifndef PRODUCT | 2032 #ifndef PRODUCT |
2038 if (_cfg->C->trace_opto_output()) { | 2033 if (_cfg->C->trace_opto_output()) { |
2039 tty->print("# AddNodeToAvailableList: "); | 2034 tty->print("# AddNodeToAvailableList: "); |
2075 if (_cfg->C->trace_opto_output()) | 2070 if (_cfg->C->trace_opto_output()) |
2076 dump_available(); | 2071 dump_available(); |
2077 #endif | 2072 #endif |
2078 } | 2073 } |
2079 | 2074 |
2080 //------------------------------DecrementUseCounts----------------------------- | |
2081 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { | 2075 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { |
2082 for ( uint i=0; i < n->len(); i++ ) { | 2076 for ( uint i=0; i < n->len(); i++ ) { |
2083 Node *def = n->in(i); | 2077 Node *def = n->in(i); |
2084 if (!def) continue; | 2078 if (!def) continue; |
2085 if( def->is_Proj() ) // If this is a machine projection, then | 2079 if( def->is_Proj() ) // If this is a machine projection, then |
2086 def = def->in(0); // propagate usage thru to the base instruction | 2080 def = def->in(0); // propagate usage thru to the base instruction |
2087 | 2081 |
2088 if( _bbs[def->_idx] != bb ) // Ignore if not block-local | 2082 if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local |
2089 continue; | 2083 continue; |
2084 } | |
2090 | 2085 |
2091 // Compute the latency | 2086 // Compute the latency |
2092 uint l = _bundle_cycle_number + n->latency(i); | 2087 uint l = _bundle_cycle_number + n->latency(i); |
2093 if (_current_latency[def->_idx] < l) | 2088 if (_current_latency[def->_idx] < l) |
2094 _current_latency[def->_idx] = l; | 2089 _current_latency[def->_idx] = l; |
2097 if ((--_uses[def->_idx]) == 0) | 2092 if ((--_uses[def->_idx]) == 0) |
2098 AddNodeToAvailableList(def); | 2093 AddNodeToAvailableList(def); |
2099 } | 2094 } |
2100 } | 2095 } |
2101 | 2096 |
2102 //------------------------------AddNodeToBundle-------------------------------- | |
2103 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { | 2097 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { |
2104 #ifndef PRODUCT | 2098 #ifndef PRODUCT |
2105 if (_cfg->C->trace_opto_output()) { | 2099 if (_cfg->C->trace_opto_output()) { |
2106 tty->print("# AddNodeToBundle: "); | 2100 tty->print("# AddNodeToBundle: "); |
2107 n->dump(); | 2101 n->dump(); |
2312 // Walk all the definitions, decrementing use counts, and | 2306 // Walk all the definitions, decrementing use counts, and |
2313 // if a definition has a 0 use count, place it in the available list. | 2307 // if a definition has a 0 use count, place it in the available list. |
2314 DecrementUseCounts(n,bb); | 2308 DecrementUseCounts(n,bb); |
2315 } | 2309 } |
2316 | 2310 |
2317 //------------------------------ComputeUseCount-------------------------------- | |
2318 // This method sets the use count within a basic block. We will ignore all | 2311 // This method sets the use count within a basic block. We will ignore all |
2319 // uses outside the current basic block. As we are doing a backwards walk, | 2312 // uses outside the current basic block. As we are doing a backwards walk, |
2320 // any node we reach that has a use count of 0 may be scheduled. This also | 2313 // any node we reach that has a use count of 0 may be scheduled. This also |
2321 // avoids the problem of cyclic references from phi nodes, as long as phi | 2314 // avoids the problem of cyclic references from phi nodes, as long as phi |
2322 // nodes are at the front of the basic block. This method also initializes | 2315 // nodes are at the front of the basic block. This method also initializes |
2356 // Account for all uses | 2349 // Account for all uses |
2357 for ( uint k = 0; k < n->len(); k++ ) { | 2350 for ( uint k = 0; k < n->len(); k++ ) { |
2358 Node *inp = n->in(k); | 2351 Node *inp = n->in(k); |
2359 if (!inp) continue; | 2352 if (!inp) continue; |
2360 assert(inp != n, "no cycles allowed" ); | 2353 assert(inp != n, "no cycles allowed" ); |
2361 if( _bbs[inp->_idx] == bb ) { // Block-local use? | 2354 if (_cfg->get_block_for_node(inp) == bb) { // Block-local use? |
2362 if( inp->is_Proj() ) // Skip through Proj's | 2355 if (inp->is_Proj()) { // Skip through Proj's |
2363 inp = inp->in(0); | 2356 inp = inp->in(0); |
2357 } | |
2364 ++_uses[inp->_idx]; // Count 1 block-local use | 2358 ++_uses[inp->_idx]; // Count 1 block-local use |
2365 } | 2359 } |
2366 } | 2360 } |
2367 | 2361 |
2368 // If this instruction has a 0 use count, then it is available | 2362 // If this instruction has a 0 use count, then it is available |
2396 | 2390 |
2397 Block *succ_bb = NULL; | 2391 Block *succ_bb = NULL; |
2398 Block *bb; | 2392 Block *bb; |
2399 | 2393 |
2400 // Walk over all the basic blocks in reverse order | 2394 // Walk over all the basic blocks in reverse order |
2401 for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { | 2395 for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) { |
2402 bb = _cfg->_blocks[i]; | 2396 bb = _cfg->get_block(i); |
2403 | 2397 |
2404 #ifndef PRODUCT | 2398 #ifndef PRODUCT |
2405 if (_cfg->C->trace_opto_output()) { | 2399 if (_cfg->C->trace_opto_output()) { |
2406 tty->print("# Schedule BB#%03d (initial)\n", i); | 2400 tty->print("# Schedule BB#%03d (initial)\n", i); |
2407 for (uint j = 0; j < bb->_nodes.size(); j++) | 2401 for (uint j = 0; j < bb->_nodes.size(); j++) { |
2408 bb->_nodes[j]->dump(); | 2402 bb->_nodes[j]->dump(); |
2403 } | |
2409 } | 2404 } |
2410 #endif | 2405 #endif |
2411 | 2406 |
2412 // On the head node, skip processing | 2407 // On the head node, skip processing |
2413 if( bb == _cfg->_broot ) | 2408 if (bb == _cfg->get_root_block()) { |
2414 continue; | 2409 continue; |
2410 } | |
2415 | 2411 |
2416 // Skip empty, connector blocks | 2412 // Skip empty, connector blocks |
2417 if (bb->is_connector()) | 2413 if (bb->is_connector()) |
2418 continue; | 2414 continue; |
2419 | 2415 |
2546 // Record final node-bundling array location | 2542 // Record final node-bundling array location |
2547 _regalloc->C->set_node_bundling_base(_node_bundling_base); | 2543 _regalloc->C->set_node_bundling_base(_node_bundling_base); |
2548 | 2544 |
2549 } // end DoScheduling | 2545 } // end DoScheduling |
2550 | 2546 |
2551 //------------------------------verify_good_schedule--------------------------- | |
2552 // Verify that no live-range used in the block is killed in the block by a | 2547 // Verify that no live-range used in the block is killed in the block by a |
2553 // wrong DEF. This doesn't verify live-ranges that span blocks. | 2548 // wrong DEF. This doesn't verify live-ranges that span blocks. |
2554 | 2549 |
2555 // Check for edge existence. Used to avoid adding redundant precedence edges. | 2550 // Check for edge existence. Used to avoid adding redundant precedence edges. |
2556 static bool edge_from_to( Node *from, Node *to ) { | 2551 static bool edge_from_to( Node *from, Node *to ) { |
2559 return true; | 2554 return true; |
2560 return false; | 2555 return false; |
2561 } | 2556 } |
2562 | 2557 |
2563 #ifdef ASSERT | 2558 #ifdef ASSERT |
2564 //------------------------------verify_do_def---------------------------------- | |
2565 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { | 2559 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { |
2566 // Check for bad kills | 2560 // Check for bad kills |
2567 if( OptoReg::is_valid(def) ) { // Ignore stores & control flow | 2561 if( OptoReg::is_valid(def) ) { // Ignore stores & control flow |
2568 Node *prior_use = _reg_node[def]; | 2562 Node *prior_use = _reg_node[def]; |
2569 if( prior_use && !edge_from_to(prior_use,n) ) { | 2563 if( prior_use && !edge_from_to(prior_use,n) ) { |
2575 } | 2569 } |
2576 _reg_node.map(def,NULL); // Kill live USEs | 2570 _reg_node.map(def,NULL); // Kill live USEs |
2577 } | 2571 } |
2578 } | 2572 } |
2579 | 2573 |
2580 //------------------------------verify_good_schedule--------------------------- | |
2581 void Scheduling::verify_good_schedule( Block *b, const char *msg ) { | 2574 void Scheduling::verify_good_schedule( Block *b, const char *msg ) { |
2582 | 2575 |
2583 // Zap to something reasonable for the verify code | 2576 // Zap to something reasonable for the verify code |
2584 _reg_node.clear(); | 2577 _reg_node.clear(); |
2585 | 2578 |
2635 if( from != to && // No cycles (for things like LD L0,[L0+4] ) | 2628 if( from != to && // No cycles (for things like LD L0,[L0+4] ) |
2636 !edge_from_to( from, to ) ) // Avoid duplicate edge | 2629 !edge_from_to( from, to ) ) // Avoid duplicate edge |
2637 from->add_prec(to); | 2630 from->add_prec(to); |
2638 } | 2631 } |
2639 | 2632 |
2640 //------------------------------anti_do_def------------------------------------ | |
2641 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { | 2633 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { |
2642 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow | 2634 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow |
2643 return; | 2635 return; |
2644 | 2636 |
2645 Node *pinch = _reg_node[def_reg]; // Get pinch point | 2637 Node *pinch = _reg_node[def_reg]; // Get pinch point |
2646 if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet? | 2638 if ((pinch == NULL) || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet? |
2647 is_def ) { // Check for a true def (not a kill) | 2639 is_def ) { // Check for a true def (not a kill) |
2648 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point | 2640 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point |
2649 return; | 2641 return; |
2650 } | 2642 } |
2651 | 2643 |
2667 } | 2659 } |
2668 if (pinch->_idx >= _regalloc->node_regs_max_index()) { | 2660 if (pinch->_idx >= _regalloc->node_regs_max_index()) { |
2669 _cfg->C->record_method_not_compilable("too many D-U pinch points"); | 2661 _cfg->C->record_method_not_compilable("too many D-U pinch points"); |
2670 return; | 2662 return; |
2671 } | 2663 } |
2672 _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init) | 2664 _cfg->map_node_to_block(pinch, b); // Pretend it's valid in this block (lazy init) |
2673 _reg_node.map(def_reg,pinch); // Record pinch-point | 2665 _reg_node.map(def_reg,pinch); // Record pinch-point |
2674 //_regalloc->set_bad(pinch->_idx); // Already initialized this way. | 2666 //_regalloc->set_bad(pinch->_idx); // Already initialized this way. |
2675 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill | 2667 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill |
2676 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call | 2668 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call |
2677 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch | 2669 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch |
2705 | 2697 |
2706 // Add edge from kill to pinch-point | 2698 // Add edge from kill to pinch-point |
2707 add_prec_edge_from_to(kill,pinch); | 2699 add_prec_edge_from_to(kill,pinch); |
2708 } | 2700 } |
2709 | 2701 |
2710 //------------------------------anti_do_use------------------------------------ | |
2711 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { | 2702 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { |
2712 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow | 2703 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow |
2713 return; | 2704 return; |
2714 Node *pinch = _reg_node[use_reg]; // Get pinch point | 2705 Node *pinch = _reg_node[use_reg]; // Get pinch point |
2715 // Check for no later def_reg/kill in block | 2706 // Check for no later def_reg/kill in block |
2716 if( pinch && _bbs[pinch->_idx] == b && | 2707 if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b && |
2717 // Use has to be block-local as well | 2708 // Use has to be block-local as well |
2718 _bbs[use->_idx] == b ) { | 2709 _cfg->get_block_for_node(use) == b) { |
2719 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) | 2710 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) |
2720 pinch->req() == 1 ) { // pinch not yet in block? | 2711 pinch->req() == 1 ) { // pinch not yet in block? |
2721 pinch->del_req(0); // yank pointer to later-def, also set flag | 2712 pinch->del_req(0); // yank pointer to later-def, also set flag |
2722 // Insert the pinch-point in the block just after the last use | 2713 // Insert the pinch-point in the block just after the last use |
2723 b->_nodes.insert(b->find_node(use)+1,pinch); | 2714 b->_nodes.insert(b->find_node(use)+1,pinch); |
2726 | 2717 |
2727 add_prec_edge_from_to(pinch,use); | 2718 add_prec_edge_from_to(pinch,use); |
2728 } | 2719 } |
2729 } | 2720 } |
2730 | 2721 |
2731 //------------------------------ComputeRegisterAntidependences----------------- | |
2732 // We insert antidependences between the reads and following write of | 2722 // We insert antidependences between the reads and following write of |
2733 // allocated registers to prevent illegal code motion. Hopefully, the | 2723 // allocated registers to prevent illegal code motion. Hopefully, the |
2734 // number of added references should be fairly small, especially as we | 2724 // number of added references should be fairly small, especially as we |
2735 // are only adding references within the current basic block. | 2725 // are only adding references within the current basic block. |
2736 void Scheduling::ComputeRegisterAntidependencies(Block *b) { | 2726 void Scheduling::ComputeRegisterAntidependencies(Block *b) { |
2859 // Garbage collect pinch nodes that were not consumed. | 2849 // Garbage collect pinch nodes that were not consumed. |
2860 // They are usually created by a fat kill MachProj for a call. | 2850 // They are usually created by a fat kill MachProj for a call. |
2861 garbage_collect_pinch_nodes(); | 2851 garbage_collect_pinch_nodes(); |
2862 } | 2852 } |
2863 } | 2853 } |
2864 | |
2865 //------------------------------garbage_collect_pinch_nodes------------------------------- | |
2866 | 2854 |
2867 // Garbage collect pinch nodes for reuse by other blocks. | 2855 // Garbage collect pinch nodes for reuse by other blocks. |
2868 // | 2856 // |
2869 // The block scheduler's insertion of anti-dependence | 2857 // The block scheduler's insertion of anti-dependence |
2870 // edges creates many pinch nodes when the block contains | 2858 // edges creates many pinch nodes when the block contains |
2893 if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); | 2881 if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); |
2894 #endif | 2882 #endif |
2895 int trace_cnt = 0; | 2883 int trace_cnt = 0; |
2896 for (uint k = 0; k < _reg_node.Size(); k++) { | 2884 for (uint k = 0; k < _reg_node.Size(); k++) { |
2897 Node* pinch = _reg_node[k]; | 2885 Node* pinch = _reg_node[k]; |
2898 if (pinch != NULL && pinch->Opcode() == Op_Node && | 2886 if ((pinch != NULL) && pinch->Opcode() == Op_Node && |
2899 // no predecence input edges | 2887 // no predecence input edges |
2900 (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { | 2888 (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { |
2901 cleanup_pinch(pinch); | 2889 cleanup_pinch(pinch); |
2902 _pinch_free_list.push(pinch); | 2890 _pinch_free_list.push(pinch); |
2903 _reg_node.map(k, NULL); | 2891 _reg_node.map(k, NULL); |
2936 } | 2924 } |
2937 // May have a later_def entry | 2925 // May have a later_def entry |
2938 pinch->set_req(0, NULL); | 2926 pinch->set_req(0, NULL); |
2939 } | 2927 } |
2940 | 2928 |
2941 //------------------------------print_statistics------------------------------- | |
2942 #ifndef PRODUCT | 2929 #ifndef PRODUCT |
2943 | 2930 |
2944 void Scheduling::dump_available() const { | 2931 void Scheduling::dump_available() const { |
2945 tty->print("#Availist "); | 2932 tty->print("#Availist "); |
2946 for (uint i = 0; i < _available.size(); i++) | 2933 for (uint i = 0; i < _available.size(); i++) |