Mercurial > hg > truffle
comparison src/share/vm/opto/output.cpp @ 3851:95134e034042
7063629: use cbcond in C2 generated code on T4
Summary: Use new short branch instruction in C2 generated code.
Reviewed-by: never
author | kvn |
---|---|
date | Thu, 11 Aug 2011 12:08:11 -0700 |
parents | c7b60b601eb4 |
children | 11211f7cb5a0 |
comparison
equal
deleted
inserted
replaced
3850:6987871cfb9b | 3851:95134e034042 |
---|---|
126 | 126 |
127 # ifdef ENABLE_ZAP_DEAD_LOCALS | 127 # ifdef ENABLE_ZAP_DEAD_LOCALS |
128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); | 128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); |
129 # endif | 129 # endif |
130 | 130 |
131 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); | |
132 blk_starts[0] = 0; | |
133 | |
134 // Initialize code buffer and process short branches. | |
135 CodeBuffer* cb = init_buffer(blk_starts); | |
136 | |
137 if (cb == NULL || failing()) return; | |
138 | |
131 ScheduleAndBundle(); | 139 ScheduleAndBundle(); |
132 | 140 |
133 #ifndef PRODUCT | 141 #ifndef PRODUCT |
134 if (trace_opto_output()) { | 142 if (trace_opto_output()) { |
135 tty->print("\n---- After ScheduleAndBundle ----\n"); | 143 tty->print("\n---- After ScheduleAndBundle ----\n"); |
146 } | 154 } |
147 #endif | 155 #endif |
148 | 156 |
149 if (failing()) return; | 157 if (failing()) return; |
150 | 158 |
159 finalize_offsets_and_shorten(blk_starts); | |
160 | |
151 BuildOopMaps(); | 161 BuildOopMaps(); |
152 | 162 |
153 if (failing()) return; | 163 if (failing()) return; |
154 | 164 |
155 Fill_buffer(); | 165 fill_buffer(cb, blk_starts); |
156 } | 166 } |
157 | 167 |
158 bool Compile::need_stack_bang(int frame_size_in_bytes) const { | 168 bool Compile::need_stack_bang(int frame_size_in_bytes) const { |
159 // Determine if we need to generate a stack overflow check. | 169 // Determine if we need to generate a stack overflow check. |
160 // Do it if the method is not a stub function and | 170 // Do it if the method is not a stub function and |
323 } // f( b->head()->is_Loop() ) | 333 } // f( b->head()->is_Loop() ) |
324 } // for( i <= last_block ) | 334 } // for( i <= last_block ) |
325 } // if( MaxLoopPad < OptoLoopAlignment-1 ) | 335 } // if( MaxLoopPad < OptoLoopAlignment-1 ) |
326 } | 336 } |
327 | 337 |
328 //----------------------Shorten_branches--------------------------------------- | 338 //----------------------shorten_branches--------------------------------------- |
329 // The architecture description provides short branch variants for some long | 339 // The architecture description provides short branch variants for some long |
330 // branch instructions. Replace eligible long branches with short branches. | 340 // branch instructions. Replace eligible long branches with short branches. |
331 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) { | 341 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { |
332 | |
333 // fill in the nop array for bundling computations | |
334 MachNode *_nop_list[Bundle::_nop_count]; | |
335 Bundle::initialize_nops(_nop_list, this); | |
336 | 342 |
337 // ------------------ | 343 // ------------------ |
338 // Compute size of each block, method size, and relocation information size | 344 // Compute size of each block, method size, and relocation information size |
339 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); | 345 uint nblocks = _cfg->_num_blocks; |
340 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); | 346 |
341 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) | 347 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); |
342 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) | 348 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); |
343 blk_starts[0] = 0; | 349 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); |
350 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) | |
351 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); ) | |
352 | |
353 bool has_short_branch_candidate = false; | |
344 | 354 |
345 // Initialize the sizes to 0 | 355 // Initialize the sizes to 0 |
346 code_size = 0; // Size in bytes of generated code | 356 code_size = 0; // Size in bytes of generated code |
347 stub_size = 0; // Size in bytes of all stub entries | 357 stub_size = 0; // Size in bytes of all stub entries |
348 // Size in bytes of all relocation entries, including those in local stubs. | 358 // Size in bytes of all relocation entries, including those in local stubs. |
349 // Start with 2-bytes of reloc info for the unvalidated entry point | 359 // Start with 2-bytes of reloc info for the unvalidated entry point |
350 reloc_size = 1; // Number of relocation entries | 360 reloc_size = 1; // Number of relocation entries |
351 | 361 |
352 // Make three passes. The first computes pessimistic blk_starts, | 362 // Make three passes. The first computes pessimistic blk_starts, |
353 // relative jmp_end and reloc_size information. The second performs | 363 // relative jmp_offset and reloc_size information. The second performs |
354 // short branch substitution using the pessimistic sizing. The | 364 // short branch substitution using the pessimistic sizing. The |
355 // third inserts nops where needed. | 365 // third inserts nops where needed. |
356 | 366 |
357 Node *nj; // tmp | |
358 | |
359 // Step one, perform a pessimistic sizing pass. | 367 // Step one, perform a pessimistic sizing pass. |
360 uint i; | 368 uint last_call_adr = max_uint; |
361 uint min_offset_from_last_call = 1; // init to a positive value | 369 uint last_avoid_back_to_back_adr = max_uint; |
362 uint nop_size = (new (this) MachNopNode())->size(_regalloc); | 370 uint nop_size = (new (this) MachNopNode())->size(_regalloc); |
363 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | 371 for (uint i = 0; i < nblocks; i++) { // For all blocks |
364 Block *b = _cfg->_blocks[i]; | 372 Block *b = _cfg->_blocks[i]; |
373 | |
374 // During short branch replacement, we store the relative (to blk_starts) | |
375 // offset of jump in jmp_offset, rather than the absolute offset of jump. | |
376 // This is so that we do not need to recompute sizes of all nodes when | |
377 // we compute correct blk_starts in our next sizing pass. | |
378 jmp_offset[i] = 0; | |
379 jmp_size[i] = 0; | |
380 jmp_nidx[i] = -1; | |
381 DEBUG_ONLY( jmp_target[i] = 0; ) | |
382 DEBUG_ONLY( jmp_rule[i] = 0; ) | |
365 | 383 |
366 // Sum all instruction sizes to compute block size | 384 // Sum all instruction sizes to compute block size |
367 uint last_inst = b->_nodes.size(); | 385 uint last_inst = b->_nodes.size(); |
368 uint blk_size = 0; | 386 uint blk_size = 0; |
369 for( uint j = 0; j<last_inst; j++ ) { | 387 for (uint j = 0; j < last_inst; j++) { |
370 nj = b->_nodes[j]; | 388 Node* nj = b->_nodes[j]; |
371 uint inst_size = nj->size(_regalloc); | 389 uint inst_size = nj->size(_regalloc); |
372 blk_size += inst_size; | |
373 // Handle machine instruction nodes | 390 // Handle machine instruction nodes |
374 if( nj->is_Mach() ) { | 391 if (nj->is_Mach()) { |
375 MachNode *mach = nj->as_Mach(); | 392 MachNode *mach = nj->as_Mach(); |
376 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding | 393 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding |
377 reloc_size += mach->reloc(); | 394 reloc_size += mach->reloc(); |
378 if( mach->is_MachCall() ) { | 395 if( mach->is_MachCall() ) { |
379 MachCallNode *mcall = mach->as_MachCall(); | 396 MachCallNode *mcall = mach->as_MachCall(); |
386 reloc_size += reloc_java_to_interp(); | 403 reloc_size += reloc_java_to_interp(); |
387 } | 404 } |
388 } else if (mach->is_MachSafePoint()) { | 405 } else if (mach->is_MachSafePoint()) { |
389 // If call/safepoint are adjacent, account for possible | 406 // If call/safepoint are adjacent, account for possible |
390 // nop to disambiguate the two safepoints. | 407 // nop to disambiguate the two safepoints. |
391 if (min_offset_from_last_call == 0) { | 408 // ScheduleAndBundle() can rearrange nodes in a block, |
409 // check for all offsets inside this block. | |
410 if (last_call_adr >= blk_starts[i]) { | |
392 blk_size += nop_size; | 411 blk_size += nop_size; |
393 } | 412 } |
394 } | 413 } |
395 } | 414 if (mach->avoid_back_to_back()) { |
396 min_offset_from_last_call += inst_size; | 415 // Nop is inserted between "avoid back to back" instructions. |
416 // ScheduleAndBundle() can rearrange nodes in a block, | |
417 // check for all offsets inside this block. | |
418 if (last_avoid_back_to_back_adr >= blk_starts[i]) { | |
419 blk_size += nop_size; | |
420 } | |
421 } | |
422 if (mach->may_be_short_branch()) { | |
423 if (!nj->is_Branch()) { | |
424 #ifndef PRODUCT | |
425 nj->dump(3); | |
426 #endif | |
427 Unimplemented(); | |
428 } | |
429 assert(jmp_nidx[i] == -1, "block should have only one branch"); | |
430 jmp_offset[i] = blk_size; | |
431 jmp_size[i] = inst_size; | |
432 jmp_nidx[i] = j; | |
433 has_short_branch_candidate = true; | |
434 } | |
435 } | |
436 blk_size += inst_size; | |
397 // Remember end of call offset | 437 // Remember end of call offset |
398 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { | 438 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { |
399 min_offset_from_last_call = 0; | 439 last_call_adr = blk_starts[i]+blk_size; |
400 } | 440 } |
401 } | 441 // Remember end of avoid_back_to_back offset |
402 | 442 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { |
403 // During short branch replacement, we store the relative (to blk_starts) | 443 last_avoid_back_to_back_adr = blk_starts[i]+blk_size; |
404 // end of jump in jmp_end, rather than the absolute end of jump. This | 444 } |
405 // is so that we do not need to recompute sizes of all nodes when we compute | 445 } |
406 // correct blk_starts in our next sizing pass. | |
407 jmp_end[i] = blk_size; | |
408 DEBUG_ONLY( jmp_target[i] = 0; ) | |
409 | 446 |
410 // When the next block starts a loop, we may insert pad NOP | 447 // When the next block starts a loop, we may insert pad NOP |
411 // instructions. Since we cannot know our future alignment, | 448 // instructions. Since we cannot know our future alignment, |
412 // assume the worst. | 449 // assume the worst. |
413 if( i<_cfg->_num_blocks-1 ) { | 450 if (i< nblocks-1) { |
414 Block *nb = _cfg->_blocks[i+1]; | 451 Block *nb = _cfg->_blocks[i+1]; |
415 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); | 452 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); |
416 if( max_loop_pad > 0 ) { | 453 if (max_loop_pad > 0) { |
417 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); | 454 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); |
418 blk_size += max_loop_pad; | 455 blk_size += max_loop_pad; |
419 } | 456 } |
420 } | 457 } |
421 | 458 |
422 // Save block size; update total method size | 459 // Save block size; update total method size |
423 blk_starts[i+1] = blk_starts[i]+blk_size; | 460 blk_starts[i+1] = blk_starts[i]+blk_size; |
424 } | 461 } |
425 | 462 |
426 // Step two, replace eligible long jumps. | 463 // Step two, replace eligible long jumps. |
427 | 464 bool progress = true; |
428 // Note: this will only get the long branches within short branch | 465 uint last_may_be_short_branch_adr = max_uint; |
429 // range. Another pass might detect more branches that became | 466 while (has_short_branch_candidate && progress) { |
430 // candidates because the shortening in the first pass exposed | 467 progress = false; |
431 // more opportunities. Unfortunately, this would require | 468 has_short_branch_candidate = false; |
432 // recomputing the starting and ending positions for the blocks | 469 int adjust_block_start = 0; |
433 for( i=0; i<_cfg->_num_blocks; i++ ) { | 470 for (uint i = 0; i < nblocks; i++) { |
434 Block *b = _cfg->_blocks[i]; | 471 Block *b = _cfg->_blocks[i]; |
435 | 472 int idx = jmp_nidx[i]; |
436 int j; | 473 MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); |
437 // Find the branch; ignore trailing NOPs. | 474 if (mach != NULL && mach->may_be_short_branch()) { |
438 for( j = b->_nodes.size()-1; j>=0; j-- ) { | 475 #ifdef ASSERT |
439 nj = b->_nodes[j]; | 476 assert(jmp_size[i] > 0 && mach->is_Branch(), "sanity"); |
440 if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) | 477 int j; |
441 break; | 478 // Find the branch; ignore trailing NOPs. |
442 } | 479 for (j = b->_nodes.size()-1; j>=0; j--) { |
443 | 480 Node* n = b->_nodes[j]; |
444 if (j >= 0) { | 481 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) |
445 if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { | 482 break; |
446 MachNode *mach = nj->as_Mach(); | 483 } |
484 assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); | |
485 #endif | |
486 int br_size = jmp_size[i]; | |
487 int br_offs = blk_starts[i] + jmp_offset[i]; | |
488 | |
447 // This requires the TRUE branch target be in succs[0] | 489 // This requires the TRUE branch target be in succs[0] |
448 uint bnum = b->non_connector_successor(0)->_pre_order; | 490 uint bnum = b->non_connector_successor(0)->_pre_order; |
449 uintptr_t target = blk_starts[bnum]; | 491 int offset = blk_starts[bnum] - br_offs; |
450 if( mach->is_Branch() ) { | 492 if (bnum > i) { // adjust following block's offset |
451 int offset = target-(blk_starts[i] + jmp_end[i]); | 493 offset -= adjust_block_start; |
452 if (_matcher->is_short_branch_offset(mach->rule(), offset)) { | 494 } |
453 // We've got a winner. Replace this branch. | 495 // In the following code a nop could be inserted before |
454 MachNode* replacement = mach->short_branch_version(this); | 496 // the branch which will increase the backward distance. |
455 b->_nodes.map(j, replacement); | 497 bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr); |
456 mach->subsume_by(replacement); | 498 if (needs_padding && offset <= 0) |
457 | 499 offset -= nop_size; |
458 // Update the jmp_end size to save time in our | 500 |
459 // next pass. | 501 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { |
460 jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); | 502 // We've got a winner. Replace this branch. |
461 DEBUG_ONLY( jmp_target[i] = bnum; ); | 503 MachNode* replacement = mach->short_branch_version(this); |
462 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); | 504 |
505 // Update the jmp_size. | |
506 int new_size = replacement->size(_regalloc); | |
507 int diff = br_size - new_size; | |
508 assert(diff >= (int)nop_size, "short_branch size should be smaller"); | |
509 // Conservatively take into accound padding between | |
510 // avoid_back_to_back branches. Previous branch could be | |
511 // converted into avoid_back_to_back branch during next | |
512 // rounds. | |
513 if (needs_padding && replacement->avoid_back_to_back()) { | |
514 jmp_offset[i] += nop_size; | |
515 diff -= nop_size; | |
463 } | 516 } |
517 adjust_block_start += diff; | |
518 b->_nodes.map(idx, replacement); | |
519 mach->subsume_by(replacement); | |
520 mach = replacement; | |
521 progress = true; | |
522 | |
523 jmp_size[i] = new_size; | |
524 DEBUG_ONLY( jmp_target[i] = bnum; ); | |
525 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); | |
464 } else { | 526 } else { |
465 #ifndef PRODUCT | 527 // The jump distance is not short, try again during next iteration. |
466 mach->dump(3); | 528 has_short_branch_candidate = true; |
467 #endif | |
468 Unimplemented(); | |
469 } | 529 } |
470 } | 530 } // (mach->may_be_short_branch()) |
471 } | 531 if (mach != NULL && (mach->may_be_short_branch() || |
472 } | 532 mach->avoid_back_to_back())) { |
533 last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i]; | |
534 } | |
535 blk_starts[i+1] -= adjust_block_start; | |
536 } | |
537 } | |
538 | |
539 #ifdef ASSERT | |
540 for (uint i = 0; i < nblocks; i++) { // For all blocks | |
541 if (jmp_target[i] != 0) { | |
542 int br_size = jmp_size[i]; | |
543 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); | |
544 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { | |
545 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); | |
546 } | |
547 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); | |
548 } | |
549 } | |
550 #endif | |
551 | |
552 // Step 3, compute the offsets of all blocks, will be done in finalize_offsets_and_shorten() | |
553 // after ScheduleAndBundle(). | |
554 | |
555 // ------------------ | |
556 // Compute size for code buffer | |
557 code_size = blk_starts[nblocks]; | |
558 | |
559 // Relocation records | |
560 reloc_size += 1; // Relo entry for exception handler | |
561 | |
562 // Adjust reloc_size to number of record of relocation info | |
563 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for | |
564 // a relocation index. | |
565 // The CodeBuffer will expand the locs array if this estimate is too low. | |
566 reloc_size *= 10 / sizeof(relocInfo); | |
567 } | |
568 | |
569 //----------------------finalize_offsets_and_shorten------------------------- | |
570 void Compile::finalize_offsets_and_shorten(uint* blk_starts) { | |
571 // blk_starts[] contains offsets calculated during short branches processing, | |
572 // offsets should not be increased during following steps. | |
473 | 573 |
474 // Compute the size of first NumberOfLoopInstrToAlign instructions at head | 574 // Compute the size of first NumberOfLoopInstrToAlign instructions at head |
475 // of a loop. It is used to determine the padding for loop alignment. | 575 // of a loop. It is used to determine the padding for loop alignment. |
476 compute_loop_first_inst_sizes(); | 576 compute_loop_first_inst_sizes(); |
477 | 577 |
478 // Step 3, compute the offsets of all the labels | 578 uint nblocks = _cfg->_num_blocks; |
579 #ifdef ASSERT | |
580 uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); | |
581 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); | |
582 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); | |
583 uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); | |
584 #endif | |
585 | |
586 // Inserts nops where needed and do final short branches replacement. | |
587 uint nop_size = (new (this) MachNopNode())->size(_regalloc); | |
479 uint last_call_adr = max_uint; | 588 uint last_call_adr = max_uint; |
480 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | 589 uint last_avoid_back_to_back_adr = max_uint; |
481 // copy the offset of the beginning to the corresponding label | 590 |
482 assert(labels[i].is_unused(), "cannot patch at this point"); | 591 assert(blk_starts[0] == 0, "sanity"); |
483 labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS); | 592 uint current_offset = 0; |
484 | 593 uint block_alignment_padding = 0; |
485 // insert padding for any instructions that need it | 594 |
595 for (uint i=0; i < nblocks; i++) { // For all blocks | |
486 Block *b = _cfg->_blocks[i]; | 596 Block *b = _cfg->_blocks[i]; |
597 | |
598 #ifdef ASSERT | |
599 jmp_target[i] = 0; | |
600 jmp_offset[i] = 0; | |
601 jmp_size[i] = 0; | |
602 jmp_rule[i] = 0; | |
603 #endif | |
604 | |
605 // Maximum alignment was added before loop block during | |
606 // Step One, as result padding for nodes was not added. | |
607 // Take this into account for block's size change check | |
608 // and allow increase block's size by the difference | |
609 // of maximum and actual alignment paddings. | |
610 DEBUG_ONLY( uint orig_blk_size = blk_starts[i+1] - blk_starts[i] + block_alignment_padding; ) | |
611 uint blk_offset = current_offset; | |
612 | |
487 uint last_inst = b->_nodes.size(); | 613 uint last_inst = b->_nodes.size(); |
488 uint adr = blk_starts[i]; | 614 for (uint j = 0; j<last_inst; j++) { |
489 for( uint j = 0; j<last_inst; j++ ) { | 615 Node* nj = b->_nodes[j]; |
490 nj = b->_nodes[j]; | 616 |
491 if( nj->is_Mach() ) { | 617 if (valid_bundle_info(nj) && |
492 int padding = nj->as_Mach()->compute_padding(adr); | 618 node_bundling(nj)->used_in_unconditional_delay()) { |
619 continue; // Skip instruction in delay slot | |
620 } | |
621 | |
622 uint inst_size = nj->size(_regalloc); | |
623 if (nj->is_Mach()) { | |
624 MachNode *mach = nj->as_Mach(); | |
625 int padding = mach->compute_padding(current_offset); | |
626 | |
493 // If call/safepoint are adjacent insert a nop (5010568) | 627 // If call/safepoint are adjacent insert a nop (5010568) |
494 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && | 628 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && |
495 adr == last_call_adr ) { | 629 current_offset == last_call_adr) { |
496 padding = nop_size; | 630 padding = nop_size; |
497 } | 631 } |
498 if(padding > 0) { | 632 |
633 // Inserted a nop between "avoid back to back" instructions. | |
634 if (padding == 0 && mach->avoid_back_to_back() && | |
635 current_offset == last_avoid_back_to_back_adr) { | |
636 padding = nop_size; | |
637 } | |
638 | |
639 if (padding > 0) { | |
499 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); | 640 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
500 int nops_cnt = padding / nop_size; | 641 int nops_cnt = padding / nop_size; |
501 MachNode *nop = new (this) MachNopNode(nops_cnt); | 642 MachNode *nop = new (this) MachNopNode(nops_cnt); |
502 b->_nodes.insert(j++, nop); | 643 b->_nodes.insert(j++, nop); |
503 _cfg->_bbs.map( nop->_idx, b ); | 644 _cfg->_bbs.map(nop->_idx, b); |
504 adr += padding; | |
505 last_inst++; | 645 last_inst++; |
646 current_offset += padding; | |
506 } | 647 } |
507 } | 648 |
508 adr += nj->size(_regalloc); | 649 // Try to replace long branch if delay slot is not used, |
650 // it is mostly for back branches since forward branch's | |
651 // distance is not updated yet. | |
652 bool delay_slot_is_used = valid_bundle_info(nj) && | |
653 node_bundling(nj)->use_unconditional_delay(); | |
654 if (!delay_slot_is_used && mach->may_be_short_branch()) { | |
655 int br_size = inst_size; | |
656 | |
657 // This requires the TRUE branch target be in succs[0] | |
658 uint bnum = b->non_connector_successor(0)->_pre_order; | |
659 int offset = blk_starts[bnum] - current_offset; | |
660 if (bnum >= i) { | |
661 // Current and following block's offset are not | |
662 // finilized yet, adjust distance. | |
663 offset -= (blk_starts[i] - blk_offset); | |
664 } | |
665 // In the following code a nop could be inserted before | |
666 // the branch which will increase the backward distance. | |
667 bool needs_padding = (current_offset == last_avoid_back_to_back_adr); | |
668 if (needs_padding && offset <= 0) | |
669 offset -= nop_size; | |
670 | |
671 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { | |
672 // We've got a winner. Replace this branch. | |
673 MachNode* replacement = mach->short_branch_version(this); | |
674 | |
675 // Update the jmp_size. | |
676 int new_size = replacement->size(_regalloc); | |
677 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); | |
678 // Conservatively take into accound padding between | |
679 // avoid_back_to_back branches. Previous branch could be | |
680 // converted into avoid_back_to_back branch during next | |
681 // rounds. | |
682 if (needs_padding && replacement->avoid_back_to_back()) { | |
683 MachNode *nop = new (this) MachNopNode(); | |
684 b->_nodes.insert(j++, nop); | |
685 _cfg->_bbs.map(nop->_idx, b); | |
686 last_inst++; | |
687 current_offset += nop_size; | |
688 } | |
689 inst_size = new_size; | |
690 b->_nodes.map(j, replacement); | |
691 mach->subsume_by(replacement); | |
692 nj = replacement; | |
693 #ifdef ASSERT | |
694 jmp_target[i] = bnum; | |
695 jmp_offset[i] = current_offset - blk_offset; | |
696 jmp_size[i] = new_size; | |
697 jmp_rule[i] = mach->rule(); | |
698 #endif | |
699 } | |
700 } | |
701 } | |
702 current_offset += inst_size; | |
509 | 703 |
510 // Remember end of call offset | 704 // Remember end of call offset |
511 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { | 705 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { |
512 last_call_adr = adr; | 706 last_call_adr = current_offset; |
513 } | 707 } |
514 } | 708 // Remember end of avoid_back_to_back offset |
515 | 709 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { |
516 if ( i != _cfg->_num_blocks-1) { | 710 last_avoid_back_to_back_adr = current_offset; |
517 // Get the size of the block | 711 } |
518 uint blk_size = adr - blk_starts[i]; | 712 } |
519 | 713 assert(blk_offset <= blk_starts[i], "shouldn't increase distance"); |
520 // When the next block is the top of a loop, we may insert pad NOP | 714 blk_starts[i] = blk_offset; |
521 // instructions. | 715 |
716 // When the next block is the top of a loop, we may insert pad NOP | |
717 // instructions. | |
718 if (i < nblocks-1) { | |
522 Block *nb = _cfg->_blocks[i+1]; | 719 Block *nb = _cfg->_blocks[i+1]; |
523 int current_offset = blk_starts[i] + blk_size; | 720 int padding = nb->alignment_padding(current_offset); |
524 current_offset += nb->alignment_padding(current_offset); | 721 if (padding > 0) { |
525 // Save block size; update total method size | 722 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
526 blk_starts[i+1] = current_offset; | 723 int nops_cnt = padding / nop_size; |
527 } | 724 MachNode *nop = new (this) MachNopNode(nops_cnt); |
528 } | 725 b->_nodes.insert(b->_nodes.size(), nop); |
726 _cfg->_bbs.map(nop->_idx, b); | |
727 current_offset += padding; | |
728 } | |
729 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); | |
730 assert(max_loop_pad >= padding, "sanity"); | |
731 block_alignment_padding = max_loop_pad - padding; | |
732 } | |
733 assert(orig_blk_size >= (current_offset - blk_offset), "shouldn't increase block size"); | |
734 } | |
735 blk_starts[nblocks] = current_offset; | |
529 | 736 |
530 #ifdef ASSERT | 737 #ifdef ASSERT |
531 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks | 738 for (uint i = 0; i < nblocks; i++) { // For all blocks |
532 if( jmp_target[i] != 0 ) { | 739 if (jmp_target[i] != 0) { |
533 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); | 740 int br_size = jmp_size[i]; |
534 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { | 741 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); |
535 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); | 742 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { |
536 } | 743 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); |
537 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); | 744 } |
538 } | 745 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); |
539 } | 746 } |
540 #endif | 747 } |
541 | 748 #endif |
542 // ------------------ | |
543 // Compute size for code buffer | |
544 code_size = blk_starts[i-1] + jmp_end[i-1]; | |
545 | |
546 // Relocation records | |
547 reloc_size += 1; // Relo entry for exception handler | |
548 | |
549 // Adjust reloc_size to number of record of relocation info | |
550 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for | |
551 // a relocation index. | |
552 // The CodeBuffer will expand the locs array if this estimate is too low. | |
553 reloc_size *= 10 / sizeof(relocInfo); | |
554 } | 749 } |
555 | 750 |
556 //------------------------------FillLocArray----------------------------------- | 751 //------------------------------FillLocArray----------------------------------- |
557 // Create a bit of debug info and append it to the array. The mapping is from | 752 // Create a bit of debug info and append it to the array. The mapping is from |
558 // Java local or expression stack to constant, register or stack-slot. For | 753 // Java local or expression stack to constant, register or stack-slot. For |
1024 debug_info->end_non_safepoint(pc_offset); | 1219 debug_info->end_non_safepoint(pc_offset); |
1025 } | 1220 } |
1026 | 1221 |
1027 | 1222 |
1028 | 1223 |
1029 // helper for Fill_buffer bailout logic | 1224 // helper for fill_buffer bailout logic |
1030 static void turn_off_compiler(Compile* C) { | 1225 static void turn_off_compiler(Compile* C) { |
1031 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { | 1226 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { |
1032 // Do not turn off compilation if a single giant method has | 1227 // Do not turn off compilation if a single giant method has |
1033 // blown the code cache size. | 1228 // blown the code cache size. |
1034 C->record_failure("excessive request to CodeCache"); | 1229 C->record_failure("excessive request to CodeCache"); |
1037 C->record_failure("CodeCache is full"); | 1232 C->record_failure("CodeCache is full"); |
1038 } | 1233 } |
1039 } | 1234 } |
1040 | 1235 |
1041 | 1236 |
1042 //------------------------------Fill_buffer------------------------------------ | 1237 //------------------------------init_buffer------------------------------------ |
1043 void Compile::Fill_buffer() { | 1238 CodeBuffer* Compile::init_buffer(uint* blk_starts) { |
1044 | 1239 |
1045 // Set the initially allocated size | 1240 // Set the initially allocated size |
1046 int code_req = initial_code_capacity; | 1241 int code_req = initial_code_capacity; |
1047 int locs_req = initial_locs_capacity; | 1242 int locs_req = initial_locs_capacity; |
1048 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; | 1243 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; |
1049 int const_req = initial_const_capacity; | 1244 int const_req = initial_const_capacity; |
1050 bool labels_not_set = true; | |
1051 | 1245 |
1052 int pad_req = NativeCall::instruction_size; | 1246 int pad_req = NativeCall::instruction_size; |
1053 // The extra spacing after the code is necessary on some platforms. | 1247 // The extra spacing after the code is necessary on some platforms. |
1054 // Sometimes we need to patch in a jump after the last instruction, | 1248 // Sometimes we need to patch in a jump after the last instruction, |
1055 // if the nmethod has been deoptimized. (See 4932387, 4894843.) | 1249 // if the nmethod has been deoptimized. (See 4932387, 4894843.) |
1056 | 1250 |
1057 uint i; | |
1058 // Compute the byte offset where we can store the deopt pc. | 1251 // Compute the byte offset where we can store the deopt pc. |
1059 if (fixed_slots() != 0) { | 1252 if (fixed_slots() != 0) { |
1060 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); | 1253 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); |
1061 } | 1254 } |
1062 | 1255 |
1076 // get rid of this hack and have SpillCopy generate stfspill/ldffill | 1269 // get rid of this hack and have SpillCopy generate stfspill/ldffill |
1077 // instead of stfd/stfs/ldfd/ldfs. | 1270 // instead of stfd/stfs/ldfd/ldfs. |
1078 _frame_slots += 8*(16/BytesPerInt); | 1271 _frame_slots += 8*(16/BytesPerInt); |
1079 } | 1272 } |
1080 #endif | 1273 #endif |
1081 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); | 1274 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); |
1082 | |
1083 // Create an array of unused labels, one for each basic block | |
1084 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); | |
1085 | |
1086 for( i=0; i <= _cfg->_num_blocks; i++ ) { | |
1087 blk_labels[i].init(); | |
1088 } | |
1089 | 1275 |
1090 if (has_mach_constant_base_node()) { | 1276 if (has_mach_constant_base_node()) { |
1091 // Fill the constant table. | 1277 // Fill the constant table. |
1092 // Note: This must happen before Shorten_branches. | 1278 // Note: This must happen before shorten_branches. |
1093 for (i = 0; i < _cfg->_num_blocks; i++) { | 1279 for (uint i = 0; i < _cfg->_num_blocks; i++) { |
1094 Block* b = _cfg->_blocks[i]; | 1280 Block* b = _cfg->_blocks[i]; |
1095 | 1281 |
1096 for (uint j = 0; j < b->_nodes.size(); j++) { | 1282 for (uint j = 0; j < b->_nodes.size(); j++) { |
1097 Node* n = b->_nodes[j]; | 1283 Node* n = b->_nodes[j]; |
1098 | 1284 |
1112 } | 1298 } |
1113 | 1299 |
1114 // Initialize the space for the BufferBlob used to find and verify | 1300 // Initialize the space for the BufferBlob used to find and verify |
1115 // instruction size in MachNode::emit_size() | 1301 // instruction size in MachNode::emit_size() |
1116 init_scratch_buffer_blob(const_req); | 1302 init_scratch_buffer_blob(const_req); |
1117 if (failing()) return; // Out of memory | 1303 if (failing()) return NULL; // Out of memory |
1118 | 1304 |
1119 // If this machine supports different size branch offsets, then pre-compute | 1305 // Pre-compute the length of blocks and replace |
1120 // the length of the blocks | 1306 // long branches with short if machine supports it. |
1121 if( _matcher->is_short_branch_offset(-1, 0) ) { | 1307 shorten_branches(blk_starts, code_req, locs_req, stub_req); |
1122 Shorten_branches(blk_labels, code_req, locs_req, stub_req); | |
1123 labels_not_set = false; | |
1124 } | |
1125 | 1308 |
1126 // nmethod and CodeBuffer count stubs & constants as part of method's code. | 1309 // nmethod and CodeBuffer count stubs & constants as part of method's code. |
1127 int exception_handler_req = size_exception_handler(); | 1310 int exception_handler_req = size_exception_handler(); |
1128 int deopt_handler_req = size_deopt_handler(); | 1311 int deopt_handler_req = size_deopt_handler(); |
1129 exception_handler_req += MAX_stubs_size; // add marginal slop for handler | 1312 exception_handler_req += MAX_stubs_size; // add marginal slop for handler |
1149 cb->initialize(total_req, locs_req); | 1332 cb->initialize(total_req, locs_req); |
1150 | 1333 |
1151 // Have we run out of code space? | 1334 // Have we run out of code space? |
1152 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { | 1335 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
1153 turn_off_compiler(this); | 1336 turn_off_compiler(this); |
1154 return; | 1337 return NULL; |
1155 } | 1338 } |
1156 // Configure the code buffer. | 1339 // Configure the code buffer. |
1157 cb->initialize_consts_size(const_req); | 1340 cb->initialize_consts_size(const_req); |
1158 cb->initialize_stubs_size(stub_req); | 1341 cb->initialize_stubs_size(stub_req); |
1159 cb->initialize_oop_recorder(env()->oop_recorder()); | 1342 cb->initialize_oop_recorder(env()->oop_recorder()); |
1160 | 1343 |
1161 // fill in the nop array for bundling computations | 1344 // fill in the nop array for bundling computations |
1162 MachNode *_nop_list[Bundle::_nop_count]; | 1345 MachNode *_nop_list[Bundle::_nop_count]; |
1163 Bundle::initialize_nops(_nop_list, this); | 1346 Bundle::initialize_nops(_nop_list, this); |
1164 | 1347 |
1348 return cb; | |
1349 } | |
1350 | |
1351 //------------------------------fill_buffer------------------------------------ | |
1352 void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { | |
1353 | |
1165 // Create oopmap set. | 1354 // Create oopmap set. |
1166 _oop_map_set = new OopMapSet(); | 1355 _oop_map_set = new OopMapSet(); |
1167 | 1356 |
1168 // !!!!! This preserves old handling of oopmaps for now | 1357 // !!!!! This preserves old handling of oopmaps for now |
1169 debug_info()->set_oopmaps(_oop_map_set); | 1358 debug_info()->set_oopmaps(_oop_map_set); |
1178 uint return_offset = 0; | 1367 uint return_offset = 0; |
1179 int nop_size = (new (this) MachNopNode())->size(_regalloc); | 1368 int nop_size = (new (this) MachNopNode())->size(_regalloc); |
1180 | 1369 |
1181 int previous_offset = 0; | 1370 int previous_offset = 0; |
1182 int current_offset = 0; | 1371 int current_offset = 0; |
1372 #ifdef ASSERT | |
1183 int last_call_offset = -1; | 1373 int last_call_offset = -1; |
1184 | 1374 int last_avoid_back_to_back_offset = -1; |
1375 #endif | |
1185 // Create an array of unused labels, one for each basic block, if printing is enabled | 1376 // Create an array of unused labels, one for each basic block, if printing is enabled |
1186 #ifndef PRODUCT | 1377 #ifndef PRODUCT |
1187 int *node_offsets = NULL; | 1378 int *node_offsets = NULL; |
1188 uint node_offset_limit = unique(); | 1379 uint node_offset_limit = unique(); |
1189 | 1380 |
1190 | 1381 if (print_assembly()) |
1191 if ( print_assembly() ) | |
1192 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); | 1382 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); |
1193 #endif | 1383 #endif |
1194 | 1384 |
1195 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily | 1385 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily |
1196 | 1386 |
1197 // Emit the constant table. | 1387 // Emit the constant table. |
1198 if (has_mach_constant_base_node()) { | 1388 if (has_mach_constant_base_node()) { |
1199 constant_table().emit(*cb); | 1389 constant_table().emit(*cb); |
1200 } | 1390 } |
1201 | 1391 |
1392 // Create an array of labels, one for each basic block | |
1393 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); | |
1394 for (uint i=0; i <= _cfg->_num_blocks; i++) { | |
1395 blk_labels[i].init(); | |
1396 } | |
1397 | |
1202 // ------------------ | 1398 // ------------------ |
1203 // Now fill in the code buffer | 1399 // Now fill in the code buffer |
1204 Node *delay_slot = NULL; | 1400 Node *delay_slot = NULL; |
1205 | 1401 |
1206 for( i=0; i < _cfg->_num_blocks; i++ ) { | 1402 for (uint i=0; i < _cfg->_num_blocks; i++) { |
1403 guarantee(blk_starts[i] == (uint)cb->insts_size(),"should not change size"); | |
1404 | |
1207 Block *b = _cfg->_blocks[i]; | 1405 Block *b = _cfg->_blocks[i]; |
1208 | 1406 |
1209 Node *head = b->head(); | 1407 Node *head = b->head(); |
1210 | 1408 |
1211 // If this block needs to start aligned (i.e, can be reached other | 1409 // If this block needs to start aligned (i.e, can be reached other |
1212 // than by falling-thru from the previous block), then force the | 1410 // than by falling-thru from the previous block), then force the |
1213 // start of a new bundle. | 1411 // start of a new bundle. |
1214 if( Pipeline::requires_bundling() && starts_bundle(head) ) | 1412 if (Pipeline::requires_bundling() && starts_bundle(head)) |
1215 cb->flush_bundle(true); | 1413 cb->flush_bundle(true); |
1216 | 1414 |
1415 #ifdef ASSERT | |
1416 if (!b->is_connector()) { | |
1417 stringStream st; | |
1418 b->dump_head(&_cfg->_bbs, &st); | |
1419 MacroAssembler(cb).block_comment(st.as_string()); | |
1420 } | |
1421 #endif | |
1422 | |
1217 // Define the label at the beginning of the basic block | 1423 // Define the label at the beginning of the basic block |
1218 if (labels_not_set) { | 1424 MacroAssembler(cb).bind(blk_labels[b->_pre_order]); |
1219 MacroAssembler(cb).bind(blk_labels[b->_pre_order]); | |
1220 } else { | |
1221 assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(), | |
1222 err_msg("label position does not match code offset: %d != %d", | |
1223 blk_labels[b->_pre_order].loc_pos(), cb->insts_size())); | |
1224 } | |
1225 | 1425 |
1226 uint last_inst = b->_nodes.size(); | 1426 uint last_inst = b->_nodes.size(); |
1227 | 1427 |
1228 // Emit block normally, except for last instruction. | 1428 // Emit block normally, except for last instruction. |
1229 // Emit means "dump code bits into code buffer". | 1429 // Emit means "dump code bits into code buffer". |
1230 for( uint j = 0; j<last_inst; j++ ) { | 1430 for (uint j = 0; j<last_inst; j++) { |
1231 | 1431 |
1232 // Get the node | 1432 // Get the node |
1233 Node* n = b->_nodes[j]; | 1433 Node* n = b->_nodes[j]; |
1234 | 1434 |
1235 // See if delay slots are supported | 1435 // See if delay slots are supported |
1242 continue; | 1442 continue; |
1243 } | 1443 } |
1244 | 1444 |
1245 // If this starts a new instruction group, then flush the current one | 1445 // If this starts a new instruction group, then flush the current one |
1246 // (but allow split bundles) | 1446 // (but allow split bundles) |
1247 if( Pipeline::requires_bundling() && starts_bundle(n) ) | 1447 if (Pipeline::requires_bundling() && starts_bundle(n)) |
1248 cb->flush_bundle(false); | 1448 cb->flush_bundle(false); |
1249 | 1449 |
1250 // The following logic is duplicated in the code ifdeffed for | 1450 // The following logic is duplicated in the code ifdeffed for |
1251 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It | 1451 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It |
1252 // should be factored out. Or maybe dispersed to the nodes? | 1452 // should be factored out. Or maybe dispersed to the nodes? |
1253 | 1453 |
1254 // Special handling for SafePoint/Call Nodes | 1454 // Special handling for SafePoint/Call Nodes |
1255 bool is_mcall = false; | 1455 bool is_mcall = false; |
1256 if( n->is_Mach() ) { | 1456 if (n->is_Mach()) { |
1257 MachNode *mach = n->as_Mach(); | 1457 MachNode *mach = n->as_Mach(); |
1258 is_mcall = n->is_MachCall(); | 1458 is_mcall = n->is_MachCall(); |
1259 bool is_sfn = n->is_MachSafePoint(); | 1459 bool is_sfn = n->is_MachSafePoint(); |
1260 | 1460 |
1261 // If this requires all previous instructions be flushed, then do so | 1461 // If this requires all previous instructions be flushed, then do so |
1262 if( is_sfn || is_mcall || mach->alignment_required() != 1) { | 1462 if (is_sfn || is_mcall || mach->alignment_required() != 1) { |
1263 cb->flush_bundle(true); | 1463 cb->flush_bundle(true); |
1264 current_offset = cb->insts_size(); | 1464 current_offset = cb->insts_size(); |
1265 } | 1465 } |
1466 | |
1467 #ifdef ASSERT | |
1468 // A padding may be needed again since a previous instruction | |
1469 // could be moved to delay slot. | |
1266 | 1470 |
1267 // align the instruction if necessary | 1471 // align the instruction if necessary |
1268 int padding = mach->compute_padding(current_offset); | 1472 int padding = mach->compute_padding(current_offset); |
1269 // Make sure safepoint node for polling is distinct from a call's | 1473 // Make sure safepoint node for polling is distinct from a call's |
1270 // return by adding a nop if needed. | 1474 // return by adding a nop if needed. |
1271 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { | 1475 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) { |
1272 padding = nop_size; | 1476 padding = nop_size; |
1273 } | 1477 } |
1274 assert( labels_not_set || padding == 0, "instruction should already be aligned"); | 1478 if (padding == 0 && mach->avoid_back_to_back() && |
1275 | 1479 current_offset == last_avoid_back_to_back_offset) { |
1276 if(padding > 0) { | 1480 // Avoid back to back some instructions. |
1277 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); | 1481 padding = nop_size; |
1278 int nops_cnt = padding / nop_size; | |
1279 MachNode *nop = new (this) MachNopNode(nops_cnt); | |
1280 b->_nodes.insert(j++, nop); | |
1281 last_inst++; | |
1282 _cfg->_bbs.map( nop->_idx, b ); | |
1283 nop->emit(*cb, _regalloc); | |
1284 cb->flush_bundle(true); | |
1285 current_offset = cb->insts_size(); | |
1286 } | 1482 } |
1287 | 1483 assert(padding == 0, "padding should be added already"); |
1484 #endif | |
1288 // Remember the start of the last call in a basic block | 1485 // Remember the start of the last call in a basic block |
1289 if (is_mcall) { | 1486 if (is_mcall) { |
1290 MachCallNode *mcall = mach->as_MachCall(); | 1487 MachCallNode *mcall = mach->as_MachCall(); |
1291 | 1488 |
1292 // This destination address is NOT PC-relative | 1489 // This destination address is NOT PC-relative |
1300 is_sfn = false; | 1497 is_sfn = false; |
1301 } | 1498 } |
1302 } | 1499 } |
1303 | 1500 |
1304 // sfn will be valid whenever mcall is valid now because of inheritance | 1501 // sfn will be valid whenever mcall is valid now because of inheritance |
1305 if( is_sfn || is_mcall ) { | 1502 if (is_sfn || is_mcall) { |
1306 | 1503 |
1307 // Handle special safepoint nodes for synchronization | 1504 // Handle special safepoint nodes for synchronization |
1308 if( !is_mcall ) { | 1505 if (!is_mcall) { |
1309 MachSafePointNode *sfn = mach->as_MachSafePoint(); | 1506 MachSafePointNode *sfn = mach->as_MachSafePoint(); |
1310 // !!!!! Stubs only need an oopmap right now, so bail out | 1507 // !!!!! Stubs only need an oopmap right now, so bail out |
1311 if( sfn->jvms()->method() == NULL) { | 1508 if (sfn->jvms()->method() == NULL) { |
1312 // Write the oopmap directly to the code blob??!! | 1509 // Write the oopmap directly to the code blob??!! |
1313 # ifdef ENABLE_ZAP_DEAD_LOCALS | 1510 # ifdef ENABLE_ZAP_DEAD_LOCALS |
1314 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); | 1511 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); |
1315 # endif | 1512 # endif |
1316 continue; | 1513 continue; |
1326 else if( mach->is_MachNullCheck() ) { | 1523 else if( mach->is_MachNullCheck() ) { |
1327 inct_starts[inct_cnt++] = previous_offset; | 1524 inct_starts[inct_cnt++] = previous_offset; |
1328 } | 1525 } |
1329 | 1526 |
1330 // If this is a branch, then fill in the label with the target BB's label | 1527 // If this is a branch, then fill in the label with the target BB's label |
1331 else if ( mach->is_Branch() ) { | 1528 else if (mach->is_Branch()) { |
1332 | 1529 |
1333 if ( mach->ideal_Opcode() == Op_Jump ) { | 1530 if (mach->ideal_Opcode() == Op_Jump) { |
1334 for (uint h = 0; h < b->_num_succs; h++ ) { | 1531 for (uint h = 0; h < b->_num_succs; h++) { |
1335 Block* succs_block = b->_succs[h]; | 1532 Block* succs_block = b->_succs[h]; |
1336 for (uint j = 1; j < succs_block->num_preds(); j++) { | 1533 for (uint j = 1; j < succs_block->num_preds(); j++) { |
1337 Node* jpn = succs_block->pred(j); | 1534 Node* jpn = succs_block->pred(j); |
1338 if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { | 1535 if (jpn->is_JumpProj() && jpn->in(0) == mach) { |
1339 uint block_num = succs_block->non_connector()->_pre_order; | 1536 uint block_num = succs_block->non_connector()->_pre_order; |
1340 Label *blkLabel = &blk_labels[block_num]; | 1537 Label *blkLabel = &blk_labels[block_num]; |
1341 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); | 1538 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); |
1342 } | 1539 } |
1343 } | 1540 } |
1350 } | 1547 } |
1351 } | 1548 } |
1352 | 1549 |
1353 #ifdef ASSERT | 1550 #ifdef ASSERT |
1354 // Check that oop-store precedes the card-mark | 1551 // Check that oop-store precedes the card-mark |
1355 else if( mach->ideal_Opcode() == Op_StoreCM ) { | 1552 else if (mach->ideal_Opcode() == Op_StoreCM) { |
1356 uint storeCM_idx = j; | 1553 uint storeCM_idx = j; |
1357 int count = 0; | 1554 int count = 0; |
1358 for (uint prec = mach->req(); prec < mach->len(); prec++) { | 1555 for (uint prec = mach->req(); prec < mach->len(); prec++) { |
1359 Node *oop_store = mach->in(prec); // Precedence edge | 1556 Node *oop_store = mach->in(prec); // Precedence edge |
1360 if (oop_store == NULL) continue; | 1557 if (oop_store == NULL) continue; |
1369 } | 1566 } |
1370 assert(count > 0, "storeCM expects at least one precedence edge"); | 1567 assert(count > 0, "storeCM expects at least one precedence edge"); |
1371 } | 1568 } |
1372 #endif | 1569 #endif |
1373 | 1570 |
1374 else if( !n->is_Proj() ) { | 1571 else if (!n->is_Proj()) { |
1375 // Remember the beginning of the previous instruction, in case | 1572 // Remember the beginning of the previous instruction, in case |
1376 // it's followed by a flag-kill and a null-check. Happens on | 1573 // it's followed by a flag-kill and a null-check. Happens on |
1377 // Intel all the time, with add-to-memory kind of opcodes. | 1574 // Intel all the time, with add-to-memory kind of opcodes. |
1378 previous_offset = current_offset; | 1575 previous_offset = current_offset; |
1379 } | 1576 } |
1386 return; | 1583 return; |
1387 } | 1584 } |
1388 | 1585 |
1389 // Save the offset for the listing | 1586 // Save the offset for the listing |
1390 #ifndef PRODUCT | 1587 #ifndef PRODUCT |
1391 if( node_offsets && n->_idx < node_offset_limit ) | 1588 if (node_offsets && n->_idx < node_offset_limit) |
1392 node_offsets[n->_idx] = cb->insts_size(); | 1589 node_offsets[n->_idx] = cb->insts_size(); |
1393 #endif | 1590 #endif |
1394 | 1591 |
1395 // "Normal" instruction case | 1592 // "Normal" instruction case |
1593 DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) | |
1396 n->emit(*cb, _regalloc); | 1594 n->emit(*cb, _regalloc); |
1397 current_offset = cb->insts_size(); | 1595 current_offset = cb->insts_size(); |
1596 | |
1597 #ifdef ASSERT | |
1598 if (n->size(_regalloc) != (current_offset-instr_offset)) { | |
1599 n->dump(); | |
1600 assert(n->size(_regalloc) == (current_offset-instr_offset), "wrong size of mach node"); | |
1601 } | |
1602 #endif | |
1398 non_safepoints.observe_instruction(n, current_offset); | 1603 non_safepoints.observe_instruction(n, current_offset); |
1399 | 1604 |
1605 #ifdef ASSERT | |
1400 // mcall is last "call" that can be a safepoint | 1606 // mcall is last "call" that can be a safepoint |
1401 // record it so we can see if a poll will directly follow it | 1607 // record it so we can see if a poll will directly follow it |
1402 // in which case we'll need a pad to make the PcDesc sites unique | 1608 // in which case we'll need a pad to make the PcDesc sites unique |
1403 // see 5010568. This can be slightly inaccurate but conservative | 1609 // see 5010568. This can be slightly inaccurate but conservative |
1404 // in the case that return address is not actually at current_offset. | 1610 // in the case that return address is not actually at current_offset. |
1406 | 1612 |
1407 if (is_mcall) { | 1613 if (is_mcall) { |
1408 last_call_offset = current_offset; | 1614 last_call_offset = current_offset; |
1409 } | 1615 } |
1410 | 1616 |
1617 if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) { | |
1618 // Avoid back to back some instructions. | |
1619 last_avoid_back_to_back_offset = current_offset; | |
1620 } | |
1621 #endif | |
1622 | |
1411 // See if this instruction has a delay slot | 1623 // See if this instruction has a delay slot |
1412 if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { | 1624 if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { |
1413 assert(delay_slot != NULL, "expecting delay slot node"); | 1625 assert(delay_slot != NULL, "expecting delay slot node"); |
1414 | 1626 |
1415 // Back up 1 instruction | 1627 // Back up 1 instruction |
1416 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); | 1628 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); |
1417 | 1629 |
1418 // Save the offset for the listing | 1630 // Save the offset for the listing |
1419 #ifndef PRODUCT | 1631 #ifndef PRODUCT |
1420 if( node_offsets && delay_slot->_idx < node_offset_limit ) | 1632 if (node_offsets && delay_slot->_idx < node_offset_limit) |
1421 node_offsets[delay_slot->_idx] = cb->insts_size(); | 1633 node_offsets[delay_slot->_idx] = cb->insts_size(); |
1422 #endif | 1634 #endif |
1423 | 1635 |
1424 // Support a SafePoint in the delay slot | 1636 // Support a SafePoint in the delay slot |
1425 if( delay_slot->is_MachSafePoint() ) { | 1637 if (delay_slot->is_MachSafePoint()) { |
1426 MachNode *mach = delay_slot->as_Mach(); | 1638 MachNode *mach = delay_slot->as_Mach(); |
1427 // !!!!! Stubs only need an oopmap right now, so bail out | 1639 // !!!!! Stubs only need an oopmap right now, so bail out |
1428 if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { | 1640 if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) { |
1429 // Write the oopmap directly to the code blob??!! | 1641 // Write the oopmap directly to the code blob??!! |
1430 # ifdef ENABLE_ZAP_DEAD_LOCALS | 1642 # ifdef ENABLE_ZAP_DEAD_LOCALS |
1431 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); | 1643 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); |
1432 # endif | 1644 # endif |
1433 delay_slot = NULL; | 1645 delay_slot = NULL; |
1447 // Don't reuse it | 1659 // Don't reuse it |
1448 delay_slot = NULL; | 1660 delay_slot = NULL; |
1449 } | 1661 } |
1450 | 1662 |
1451 } // End for all instructions in block | 1663 } // End for all instructions in block |
1452 | 1664 #ifdef ASSERT |
1453 // If the next block is the top of a loop, pad this block out to align | 1665 // If the next block is the top of a loop, pad this block out to align |
1454 // the loop top a little. Helps prevent pipe stalls at loop back branches. | 1666 // the loop top a little. Helps prevent pipe stalls at loop back branches. |
1455 if( i<_cfg->_num_blocks-1 ) { | 1667 if (i < _cfg->_num_blocks-1) { |
1456 Block *nb = _cfg->_blocks[i+1]; | 1668 Block *nb = _cfg->_blocks[i+1]; |
1457 uint padding = nb->alignment_padding(current_offset); | 1669 uint padding = nb->alignment_padding(current_offset); |
1458 if( padding > 0 ) { | 1670 assert(padding == 0, "alignment should be added already"); |
1459 MachNode *nop = new (this) MachNopNode(padding / nop_size); | 1671 } |
1460 b->_nodes.insert( b->_nodes.size(), nop ); | 1672 #endif |
1461 _cfg->_bbs.map( nop->_idx, b ); | |
1462 nop->emit(*cb, _regalloc); | |
1463 current_offset = cb->insts_size(); | |
1464 } | |
1465 } | |
1466 | |
1467 } // End of for all blocks | 1673 } // End of for all blocks |
1468 | 1674 |
1469 non_safepoints.flush_at_end(); | 1675 non_safepoints.flush_at_end(); |
1470 | 1676 |
1471 // Offset too large? | 1677 // Offset too large? |
1741 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) | 1947 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) |
1742 | 1948 |
1743 // Create a data structure for all the scheduling information | 1949 // Create a data structure for all the scheduling information |
1744 Scheduling scheduling(Thread::current()->resource_area(), *this); | 1950 Scheduling scheduling(Thread::current()->resource_area(), *this); |
1745 | 1951 |
1746 // Initialize the space for the BufferBlob used to find and verify | |
1747 // instruction size in MachNode::emit_size() | |
1748 init_scratch_buffer_blob(MAX_const_size); | |
1749 if (failing()) return; // Out of memory | |
1750 | |
1751 // Walk backwards over each basic block, computing the needed alignment | 1952 // Walk backwards over each basic block, computing the needed alignment |
1752 // Walk over all the basic blocks | 1953 // Walk over all the basic blocks |
1753 scheduling.DoScheduling(); | 1954 scheduling.DoScheduling(); |
1754 } | 1955 } |
1755 | 1956 |
2344 // normally schedule conditional branches (despite them being forced last | 2545 // normally schedule conditional branches (despite them being forced last |
2345 // in the block), because they have delay slots we can fill. Calls all | 2546 // in the block), because they have delay slots we can fill. Calls all |
2346 // have their delay slots filled in the template expansions, so we don't | 2547 // have their delay slots filled in the template expansions, so we don't |
2347 // bother scheduling them. | 2548 // bother scheduling them. |
2348 Node *last = bb->_nodes[_bb_end]; | 2549 Node *last = bb->_nodes[_bb_end]; |
2550 // Ignore trailing NOPs. | |
2551 while (_bb_end > 0 && last->is_Mach() && | |
2552 last->as_Mach()->ideal_Opcode() == Op_Con) { | |
2553 last = bb->_nodes[--_bb_end]; | |
2554 } | |
2555 assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); | |
2349 if( last->is_Catch() || | 2556 if( last->is_Catch() || |
2350 // Exclude unreachable path case when Halt node is in a separate block. | 2557 // Exclude unreachable path case when Halt node is in a separate block. |
2351 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { | 2558 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { |
2352 // There must be a prior call. Skip it. | 2559 // There must be a prior call. Skip it. |
2353 while( !bb->_nodes[--_bb_end]->is_MachCall() ) { | 2560 while( !bb->_nodes[--_bb_end]->is_MachCall() ) { |
2676 } | 2883 } |
2677 } else { | 2884 } else { |
2678 // Get DEF'd registers the normal way | 2885 // Get DEF'd registers the normal way |
2679 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); | 2886 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); |
2680 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); | 2887 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); |
2888 } | |
2889 | |
2890 // Kill projections on a branch should appear to occur on the | |
2891 // branch, not afterwards, so grab the masks from the projections | |
2892 // and process them. | |
2893 if (n->is_Branch()) { | |
2894 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | |
2895 Node* use = n->fast_out(i); | |
2896 if (use->is_Proj()) { | |
2897 RegMask rm = use->out_RegMask();// Make local copy | |
2898 while( rm.is_NotEmpty() ) { | |
2899 OptoReg::Name kill = rm.find_first_elem(); | |
2900 rm.Remove(kill); | |
2901 anti_do_def( b, n, kill, false ); | |
2902 } | |
2903 } | |
2904 } | |
2681 } | 2905 } |
2682 | 2906 |
2683 // Check each register used by this instruction for a following DEF/KILL | 2907 // Check each register used by this instruction for a following DEF/KILL |
2684 // that must occur afterward and requires an anti-dependence edge. | 2908 // that must occur afterward and requires an anti-dependence edge. |
2685 for( uint j=0; j<n->req(); j++ ) { | 2909 for( uint j=0; j<n->req(); j++ ) { |