comparison src/share/vm/opto/output.cpp @ 3851:95134e034042

7063629: use cbcond in C2 generated code on T4 Summary: Use new short branch instruction in C2 generated code. Reviewed-by: never
author kvn
date Thu, 11 Aug 2011 12:08:11 -0700
parents c7b60b601eb4
children 11211f7cb5a0
comparison
equal deleted inserted replaced
3850:6987871cfb9b 3851:95134e034042
126 126
127 # ifdef ENABLE_ZAP_DEAD_LOCALS 127 # ifdef ENABLE_ZAP_DEAD_LOCALS
128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); 128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
129 # endif 129 # endif
130 130
131 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
132 blk_starts[0] = 0;
133
134 // Initialize code buffer and process short branches.
135 CodeBuffer* cb = init_buffer(blk_starts);
136
137 if (cb == NULL || failing()) return;
138
131 ScheduleAndBundle(); 139 ScheduleAndBundle();
132 140
133 #ifndef PRODUCT 141 #ifndef PRODUCT
134 if (trace_opto_output()) { 142 if (trace_opto_output()) {
135 tty->print("\n---- After ScheduleAndBundle ----\n"); 143 tty->print("\n---- After ScheduleAndBundle ----\n");
146 } 154 }
147 #endif 155 #endif
148 156
149 if (failing()) return; 157 if (failing()) return;
150 158
159 finalize_offsets_and_shorten(blk_starts);
160
151 BuildOopMaps(); 161 BuildOopMaps();
152 162
153 if (failing()) return; 163 if (failing()) return;
154 164
155 Fill_buffer(); 165 fill_buffer(cb, blk_starts);
156 } 166 }
157 167
158 bool Compile::need_stack_bang(int frame_size_in_bytes) const { 168 bool Compile::need_stack_bang(int frame_size_in_bytes) const {
159 // Determine if we need to generate a stack overflow check. 169 // Determine if we need to generate a stack overflow check.
160 // Do it if the method is not a stub function and 170 // Do it if the method is not a stub function and
323 } // f( b->head()->is_Loop() ) 333 } // f( b->head()->is_Loop() )
324 } // for( i <= last_block ) 334 } // for( i <= last_block )
325 } // if( MaxLoopPad < OptoLoopAlignment-1 ) 335 } // if( MaxLoopPad < OptoLoopAlignment-1 )
326 } 336 }
327 337
328 //----------------------Shorten_branches--------------------------------------- 338 //----------------------shorten_branches---------------------------------------
329 // The architecture description provides short branch variants for some long 339 // The architecture description provides short branch variants for some long
330 // branch instructions. Replace eligible long branches with short branches. 340 // branch instructions. Replace eligible long branches with short branches.
331 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) { 341 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
332
333 // fill in the nop array for bundling computations
334 MachNode *_nop_list[Bundle::_nop_count];
335 Bundle::initialize_nops(_nop_list, this);
336 342
337 // ------------------ 343 // ------------------
338 // Compute size of each block, method size, and relocation information size 344 // Compute size of each block, method size, and relocation information size
339 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); 345 uint nblocks = _cfg->_num_blocks;
340 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); 346
341 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) 347 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
342 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) 348 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
343 blk_starts[0] = 0; 349 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks);
350 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
351 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
352
353 bool has_short_branch_candidate = false;
344 354
345 // Initialize the sizes to 0 355 // Initialize the sizes to 0
346 code_size = 0; // Size in bytes of generated code 356 code_size = 0; // Size in bytes of generated code
347 stub_size = 0; // Size in bytes of all stub entries 357 stub_size = 0; // Size in bytes of all stub entries
348 // Size in bytes of all relocation entries, including those in local stubs. 358 // Size in bytes of all relocation entries, including those in local stubs.
349 // Start with 2-bytes of reloc info for the unvalidated entry point 359 // Start with 2-bytes of reloc info for the unvalidated entry point
350 reloc_size = 1; // Number of relocation entries 360 reloc_size = 1; // Number of relocation entries
351 361
352 // Make three passes. The first computes pessimistic blk_starts, 362 // Make three passes. The first computes pessimistic blk_starts,
353 // relative jmp_end and reloc_size information. The second performs 363 // relative jmp_offset and reloc_size information. The second performs
354 // short branch substitution using the pessimistic sizing. The 364 // short branch substitution using the pessimistic sizing. The
355 // third inserts nops where needed. 365 // third inserts nops where needed.
356 366
357 Node *nj; // tmp
358
359 // Step one, perform a pessimistic sizing pass. 367 // Step one, perform a pessimistic sizing pass.
360 uint i; 368 uint last_call_adr = max_uint;
361 uint min_offset_from_last_call = 1; // init to a positive value 369 uint last_avoid_back_to_back_adr = max_uint;
362 uint nop_size = (new (this) MachNopNode())->size(_regalloc); 370 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
363 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks 371 for (uint i = 0; i < nblocks; i++) { // For all blocks
364 Block *b = _cfg->_blocks[i]; 372 Block *b = _cfg->_blocks[i];
373
374 // During short branch replacement, we store the relative (to blk_starts)
375 // offset of jump in jmp_offset, rather than the absolute offset of jump.
376 // This is so that we do not need to recompute sizes of all nodes when
377 // we compute correct blk_starts in our next sizing pass.
378 jmp_offset[i] = 0;
379 jmp_size[i] = 0;
380 jmp_nidx[i] = -1;
381 DEBUG_ONLY( jmp_target[i] = 0; )
382 DEBUG_ONLY( jmp_rule[i] = 0; )
365 383
366 // Sum all instruction sizes to compute block size 384 // Sum all instruction sizes to compute block size
367 uint last_inst = b->_nodes.size(); 385 uint last_inst = b->_nodes.size();
368 uint blk_size = 0; 386 uint blk_size = 0;
369 for( uint j = 0; j<last_inst; j++ ) { 387 for (uint j = 0; j < last_inst; j++) {
370 nj = b->_nodes[j]; 388 Node* nj = b->_nodes[j];
371 uint inst_size = nj->size(_regalloc); 389 uint inst_size = nj->size(_regalloc);
372 blk_size += inst_size;
373 // Handle machine instruction nodes 390 // Handle machine instruction nodes
374 if( nj->is_Mach() ) { 391 if (nj->is_Mach()) {
375 MachNode *mach = nj->as_Mach(); 392 MachNode *mach = nj->as_Mach();
376 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding 393 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
377 reloc_size += mach->reloc(); 394 reloc_size += mach->reloc();
378 if( mach->is_MachCall() ) { 395 if( mach->is_MachCall() ) {
379 MachCallNode *mcall = mach->as_MachCall(); 396 MachCallNode *mcall = mach->as_MachCall();
386 reloc_size += reloc_java_to_interp(); 403 reloc_size += reloc_java_to_interp();
387 } 404 }
388 } else if (mach->is_MachSafePoint()) { 405 } else if (mach->is_MachSafePoint()) {
389 // If call/safepoint are adjacent, account for possible 406 // If call/safepoint are adjacent, account for possible
390 // nop to disambiguate the two safepoints. 407 // nop to disambiguate the two safepoints.
391 if (min_offset_from_last_call == 0) { 408 // ScheduleAndBundle() can rearrange nodes in a block,
409 // check for all offsets inside this block.
410 if (last_call_adr >= blk_starts[i]) {
392 blk_size += nop_size; 411 blk_size += nop_size;
393 } 412 }
394 } 413 }
395 } 414 if (mach->avoid_back_to_back()) {
396 min_offset_from_last_call += inst_size; 415 // Nop is inserted between "avoid back to back" instructions.
416 // ScheduleAndBundle() can rearrange nodes in a block,
417 // check for all offsets inside this block.
418 if (last_avoid_back_to_back_adr >= blk_starts[i]) {
419 blk_size += nop_size;
420 }
421 }
422 if (mach->may_be_short_branch()) {
423 if (!nj->is_Branch()) {
424 #ifndef PRODUCT
425 nj->dump(3);
426 #endif
427 Unimplemented();
428 }
429 assert(jmp_nidx[i] == -1, "block should have only one branch");
430 jmp_offset[i] = blk_size;
431 jmp_size[i] = inst_size;
432 jmp_nidx[i] = j;
433 has_short_branch_candidate = true;
434 }
435 }
436 blk_size += inst_size;
397 // Remember end of call offset 437 // Remember end of call offset
398 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { 438 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
399 min_offset_from_last_call = 0; 439 last_call_adr = blk_starts[i]+blk_size;
400 } 440 }
401 } 441 // Remember end of avoid_back_to_back offset
402 442 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) {
403 // During short branch replacement, we store the relative (to blk_starts) 443 last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
404 // end of jump in jmp_end, rather than the absolute end of jump. This 444 }
405 // is so that we do not need to recompute sizes of all nodes when we compute 445 }
406 // correct blk_starts in our next sizing pass.
407 jmp_end[i] = blk_size;
408 DEBUG_ONLY( jmp_target[i] = 0; )
409 446
410 // When the next block starts a loop, we may insert pad NOP 447 // When the next block starts a loop, we may insert pad NOP
411 // instructions. Since we cannot know our future alignment, 448 // instructions. Since we cannot know our future alignment,
412 // assume the worst. 449 // assume the worst.
413 if( i<_cfg->_num_blocks-1 ) { 450 if (i< nblocks-1) {
414 Block *nb = _cfg->_blocks[i+1]; 451 Block *nb = _cfg->_blocks[i+1];
415 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); 452 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
416 if( max_loop_pad > 0 ) { 453 if (max_loop_pad > 0) {
417 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); 454 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
418 blk_size += max_loop_pad; 455 blk_size += max_loop_pad;
419 } 456 }
420 } 457 }
421 458
422 // Save block size; update total method size 459 // Save block size; update total method size
423 blk_starts[i+1] = blk_starts[i]+blk_size; 460 blk_starts[i+1] = blk_starts[i]+blk_size;
424 } 461 }
425 462
426 // Step two, replace eligible long jumps. 463 // Step two, replace eligible long jumps.
427 464 bool progress = true;
428 // Note: this will only get the long branches within short branch 465 uint last_may_be_short_branch_adr = max_uint;
429 // range. Another pass might detect more branches that became 466 while (has_short_branch_candidate && progress) {
430 // candidates because the shortening in the first pass exposed 467 progress = false;
431 // more opportunities. Unfortunately, this would require 468 has_short_branch_candidate = false;
432 // recomputing the starting and ending positions for the blocks 469 int adjust_block_start = 0;
433 for( i=0; i<_cfg->_num_blocks; i++ ) { 470 for (uint i = 0; i < nblocks; i++) {
434 Block *b = _cfg->_blocks[i]; 471 Block *b = _cfg->_blocks[i];
435 472 int idx = jmp_nidx[i];
436 int j; 473 MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
437 // Find the branch; ignore trailing NOPs. 474 if (mach != NULL && mach->may_be_short_branch()) {
438 for( j = b->_nodes.size()-1; j>=0; j-- ) { 475 #ifdef ASSERT
439 nj = b->_nodes[j]; 476 assert(jmp_size[i] > 0 && mach->is_Branch(), "sanity");
440 if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) 477 int j;
441 break; 478 // Find the branch; ignore trailing NOPs.
442 } 479 for (j = b->_nodes.size()-1; j>=0; j--) {
443 480 Node* n = b->_nodes[j];
444 if (j >= 0) { 481 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
445 if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { 482 break;
446 MachNode *mach = nj->as_Mach(); 483 }
484 assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
485 #endif
486 int br_size = jmp_size[i];
487 int br_offs = blk_starts[i] + jmp_offset[i];
488
447 // This requires the TRUE branch target be in succs[0] 489 // This requires the TRUE branch target be in succs[0]
448 uint bnum = b->non_connector_successor(0)->_pre_order; 490 uint bnum = b->non_connector_successor(0)->_pre_order;
449 uintptr_t target = blk_starts[bnum]; 491 int offset = blk_starts[bnum] - br_offs;
450 if( mach->is_Branch() ) { 492 if (bnum > i) { // adjust following block's offset
451 int offset = target-(blk_starts[i] + jmp_end[i]); 493 offset -= adjust_block_start;
452 if (_matcher->is_short_branch_offset(mach->rule(), offset)) { 494 }
453 // We've got a winner. Replace this branch. 495 // In the following code a nop could be inserted before
454 MachNode* replacement = mach->short_branch_version(this); 496 // the branch which will increase the backward distance.
455 b->_nodes.map(j, replacement); 497 bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr);
456 mach->subsume_by(replacement); 498 if (needs_padding && offset <= 0)
457 499 offset -= nop_size;
458 // Update the jmp_end size to save time in our 500
459 // next pass. 501 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
460 jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); 502 // We've got a winner. Replace this branch.
461 DEBUG_ONLY( jmp_target[i] = bnum; ); 503 MachNode* replacement = mach->short_branch_version(this);
462 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); 504
505 // Update the jmp_size.
506 int new_size = replacement->size(_regalloc);
507 int diff = br_size - new_size;
508 assert(diff >= (int)nop_size, "short_branch size should be smaller");
509 // Conservatively take into accound padding between
510 // avoid_back_to_back branches. Previous branch could be
511 // converted into avoid_back_to_back branch during next
512 // rounds.
513 if (needs_padding && replacement->avoid_back_to_back()) {
514 jmp_offset[i] += nop_size;
515 diff -= nop_size;
463 } 516 }
517 adjust_block_start += diff;
518 b->_nodes.map(idx, replacement);
519 mach->subsume_by(replacement);
520 mach = replacement;
521 progress = true;
522
523 jmp_size[i] = new_size;
524 DEBUG_ONLY( jmp_target[i] = bnum; );
525 DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
464 } else { 526 } else {
465 #ifndef PRODUCT 527 // The jump distance is not short, try again during next iteration.
466 mach->dump(3); 528 has_short_branch_candidate = true;
467 #endif
468 Unimplemented();
469 } 529 }
470 } 530 } // (mach->may_be_short_branch())
471 } 531 if (mach != NULL && (mach->may_be_short_branch() ||
472 } 532 mach->avoid_back_to_back())) {
533 last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
534 }
535 blk_starts[i+1] -= adjust_block_start;
536 }
537 }
538
539 #ifdef ASSERT
540 for (uint i = 0; i < nblocks; i++) { // For all blocks
541 if (jmp_target[i] != 0) {
542 int br_size = jmp_size[i];
543 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
544 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
545 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
546 }
547 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
548 }
549 }
550 #endif
551
552 // Step 3, compute the offsets of all blocks, will be done in finalize_offsets_and_shorten()
553 // after ScheduleAndBundle().
554
555 // ------------------
556 // Compute size for code buffer
557 code_size = blk_starts[nblocks];
558
559 // Relocation records
560 reloc_size += 1; // Relo entry for exception handler
561
562 // Adjust reloc_size to number of record of relocation info
563 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
564 // a relocation index.
565 // The CodeBuffer will expand the locs array if this estimate is too low.
566 reloc_size *= 10 / sizeof(relocInfo);
567 }
568
569 //----------------------finalize_offsets_and_shorten-------------------------
570 void Compile::finalize_offsets_and_shorten(uint* blk_starts) {
571 // blk_starts[] contains offsets calculated during short branches processing,
572 // offsets should not be increased during following steps.
473 573
474 // Compute the size of first NumberOfLoopInstrToAlign instructions at head 574 // Compute the size of first NumberOfLoopInstrToAlign instructions at head
475 // of a loop. It is used to determine the padding for loop alignment. 575 // of a loop. It is used to determine the padding for loop alignment.
476 compute_loop_first_inst_sizes(); 576 compute_loop_first_inst_sizes();
477 577
478 // Step 3, compute the offsets of all the labels 578 uint nblocks = _cfg->_num_blocks;
579 #ifdef ASSERT
580 uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks);
581 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
582 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
583 uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks);
584 #endif
585
586 // Inserts nops where needed and do final short branches replacement.
587 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
479 uint last_call_adr = max_uint; 588 uint last_call_adr = max_uint;
480 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks 589 uint last_avoid_back_to_back_adr = max_uint;
481 // copy the offset of the beginning to the corresponding label 590
482 assert(labels[i].is_unused(), "cannot patch at this point"); 591 assert(blk_starts[0] == 0, "sanity");
483 labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS); 592 uint current_offset = 0;
484 593 uint block_alignment_padding = 0;
485 // insert padding for any instructions that need it 594
595 for (uint i=0; i < nblocks; i++) { // For all blocks
486 Block *b = _cfg->_blocks[i]; 596 Block *b = _cfg->_blocks[i];
597
598 #ifdef ASSERT
599 jmp_target[i] = 0;
600 jmp_offset[i] = 0;
601 jmp_size[i] = 0;
602 jmp_rule[i] = 0;
603 #endif
604
605 // Maximum alignment was added before loop block during
606 // Step One, as result padding for nodes was not added.
607 // Take this into account for block's size change check
608 // and allow increase block's size by the difference
609 // of maximum and actual alignment paddings.
610 DEBUG_ONLY( uint orig_blk_size = blk_starts[i+1] - blk_starts[i] + block_alignment_padding; )
611 uint blk_offset = current_offset;
612
487 uint last_inst = b->_nodes.size(); 613 uint last_inst = b->_nodes.size();
488 uint adr = blk_starts[i]; 614 for (uint j = 0; j<last_inst; j++) {
489 for( uint j = 0; j<last_inst; j++ ) { 615 Node* nj = b->_nodes[j];
490 nj = b->_nodes[j]; 616
491 if( nj->is_Mach() ) { 617 if (valid_bundle_info(nj) &&
492 int padding = nj->as_Mach()->compute_padding(adr); 618 node_bundling(nj)->used_in_unconditional_delay()) {
619 continue; // Skip instruction in delay slot
620 }
621
622 uint inst_size = nj->size(_regalloc);
623 if (nj->is_Mach()) {
624 MachNode *mach = nj->as_Mach();
625 int padding = mach->compute_padding(current_offset);
626
493 // If call/safepoint are adjacent insert a nop (5010568) 627 // If call/safepoint are adjacent insert a nop (5010568)
494 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && 628 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() &&
495 adr == last_call_adr ) { 629 current_offset == last_call_adr) {
496 padding = nop_size; 630 padding = nop_size;
497 } 631 }
498 if(padding > 0) { 632
633 // Inserted a nop between "avoid back to back" instructions.
634 if (padding == 0 && mach->avoid_back_to_back() &&
635 current_offset == last_avoid_back_to_back_adr) {
636 padding = nop_size;
637 }
638
639 if (padding > 0) {
499 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); 640 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
500 int nops_cnt = padding / nop_size; 641 int nops_cnt = padding / nop_size;
501 MachNode *nop = new (this) MachNopNode(nops_cnt); 642 MachNode *nop = new (this) MachNopNode(nops_cnt);
502 b->_nodes.insert(j++, nop); 643 b->_nodes.insert(j++, nop);
503 _cfg->_bbs.map( nop->_idx, b ); 644 _cfg->_bbs.map(nop->_idx, b);
504 adr += padding;
505 last_inst++; 645 last_inst++;
646 current_offset += padding;
506 } 647 }
507 } 648
508 adr += nj->size(_regalloc); 649 // Try to replace long branch if delay slot is not used,
650 // it is mostly for back branches since forward branch's
651 // distance is not updated yet.
652 bool delay_slot_is_used = valid_bundle_info(nj) &&
653 node_bundling(nj)->use_unconditional_delay();
654 if (!delay_slot_is_used && mach->may_be_short_branch()) {
655 int br_size = inst_size;
656
657 // This requires the TRUE branch target be in succs[0]
658 uint bnum = b->non_connector_successor(0)->_pre_order;
659 int offset = blk_starts[bnum] - current_offset;
660 if (bnum >= i) {
661 // Current and following block's offset are not
662 // finilized yet, adjust distance.
663 offset -= (blk_starts[i] - blk_offset);
664 }
665 // In the following code a nop could be inserted before
666 // the branch which will increase the backward distance.
667 bool needs_padding = (current_offset == last_avoid_back_to_back_adr);
668 if (needs_padding && offset <= 0)
669 offset -= nop_size;
670
671 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
672 // We've got a winner. Replace this branch.
673 MachNode* replacement = mach->short_branch_version(this);
674
675 // Update the jmp_size.
676 int new_size = replacement->size(_regalloc);
677 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
678 // Conservatively take into accound padding between
679 // avoid_back_to_back branches. Previous branch could be
680 // converted into avoid_back_to_back branch during next
681 // rounds.
682 if (needs_padding && replacement->avoid_back_to_back()) {
683 MachNode *nop = new (this) MachNopNode();
684 b->_nodes.insert(j++, nop);
685 _cfg->_bbs.map(nop->_idx, b);
686 last_inst++;
687 current_offset += nop_size;
688 }
689 inst_size = new_size;
690 b->_nodes.map(j, replacement);
691 mach->subsume_by(replacement);
692 nj = replacement;
693 #ifdef ASSERT
694 jmp_target[i] = bnum;
695 jmp_offset[i] = current_offset - blk_offset;
696 jmp_size[i] = new_size;
697 jmp_rule[i] = mach->rule();
698 #endif
699 }
700 }
701 }
702 current_offset += inst_size;
509 703
510 // Remember end of call offset 704 // Remember end of call offset
511 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { 705 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
512 last_call_adr = adr; 706 last_call_adr = current_offset;
513 } 707 }
514 } 708 // Remember end of avoid_back_to_back offset
515 709 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) {
516 if ( i != _cfg->_num_blocks-1) { 710 last_avoid_back_to_back_adr = current_offset;
517 // Get the size of the block 711 }
518 uint blk_size = adr - blk_starts[i]; 712 }
519 713 assert(blk_offset <= blk_starts[i], "shouldn't increase distance");
520 // When the next block is the top of a loop, we may insert pad NOP 714 blk_starts[i] = blk_offset;
521 // instructions. 715
716 // When the next block is the top of a loop, we may insert pad NOP
717 // instructions.
718 if (i < nblocks-1) {
522 Block *nb = _cfg->_blocks[i+1]; 719 Block *nb = _cfg->_blocks[i+1];
523 int current_offset = blk_starts[i] + blk_size; 720 int padding = nb->alignment_padding(current_offset);
524 current_offset += nb->alignment_padding(current_offset); 721 if (padding > 0) {
525 // Save block size; update total method size 722 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
526 blk_starts[i+1] = current_offset; 723 int nops_cnt = padding / nop_size;
527 } 724 MachNode *nop = new (this) MachNopNode(nops_cnt);
528 } 725 b->_nodes.insert(b->_nodes.size(), nop);
726 _cfg->_bbs.map(nop->_idx, b);
727 current_offset += padding;
728 }
729 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
730 assert(max_loop_pad >= padding, "sanity");
731 block_alignment_padding = max_loop_pad - padding;
732 }
733 assert(orig_blk_size >= (current_offset - blk_offset), "shouldn't increase block size");
734 }
735 blk_starts[nblocks] = current_offset;
529 736
530 #ifdef ASSERT 737 #ifdef ASSERT
531 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks 738 for (uint i = 0; i < nblocks; i++) { // For all blocks
532 if( jmp_target[i] != 0 ) { 739 if (jmp_target[i] != 0) {
533 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); 740 int br_size = jmp_size[i];
534 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { 741 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
535 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); 742 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
536 } 743 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
537 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); 744 }
538 } 745 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
539 } 746 }
540 #endif 747 }
541 748 #endif
542 // ------------------
543 // Compute size for code buffer
544 code_size = blk_starts[i-1] + jmp_end[i-1];
545
546 // Relocation records
547 reloc_size += 1; // Relo entry for exception handler
548
549 // Adjust reloc_size to number of record of relocation info
550 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
551 // a relocation index.
552 // The CodeBuffer will expand the locs array if this estimate is too low.
553 reloc_size *= 10 / sizeof(relocInfo);
554 } 749 }
555 750
556 //------------------------------FillLocArray----------------------------------- 751 //------------------------------FillLocArray-----------------------------------
557 // Create a bit of debug info and append it to the array. The mapping is from 752 // Create a bit of debug info and append it to the array. The mapping is from
558 // Java local or expression stack to constant, register or stack-slot. For 753 // Java local or expression stack to constant, register or stack-slot. For
1024 debug_info->end_non_safepoint(pc_offset); 1219 debug_info->end_non_safepoint(pc_offset);
1025 } 1220 }
1026 1221
1027 1222
1028 1223
1029 // helper for Fill_buffer bailout logic 1224 // helper for fill_buffer bailout logic
1030 static void turn_off_compiler(Compile* C) { 1225 static void turn_off_compiler(Compile* C) {
1031 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { 1226 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) {
1032 // Do not turn off compilation if a single giant method has 1227 // Do not turn off compilation if a single giant method has
1033 // blown the code cache size. 1228 // blown the code cache size.
1034 C->record_failure("excessive request to CodeCache"); 1229 C->record_failure("excessive request to CodeCache");
1037 C->record_failure("CodeCache is full"); 1232 C->record_failure("CodeCache is full");
1038 } 1233 }
1039 } 1234 }
1040 1235
1041 1236
1042 //------------------------------Fill_buffer------------------------------------ 1237 //------------------------------init_buffer------------------------------------
1043 void Compile::Fill_buffer() { 1238 CodeBuffer* Compile::init_buffer(uint* blk_starts) {
1044 1239
1045 // Set the initially allocated size 1240 // Set the initially allocated size
1046 int code_req = initial_code_capacity; 1241 int code_req = initial_code_capacity;
1047 int locs_req = initial_locs_capacity; 1242 int locs_req = initial_locs_capacity;
1048 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; 1243 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
1049 int const_req = initial_const_capacity; 1244 int const_req = initial_const_capacity;
1050 bool labels_not_set = true;
1051 1245
1052 int pad_req = NativeCall::instruction_size; 1246 int pad_req = NativeCall::instruction_size;
1053 // The extra spacing after the code is necessary on some platforms. 1247 // The extra spacing after the code is necessary on some platforms.
1054 // Sometimes we need to patch in a jump after the last instruction, 1248 // Sometimes we need to patch in a jump after the last instruction,
1055 // if the nmethod has been deoptimized. (See 4932387, 4894843.) 1249 // if the nmethod has been deoptimized. (See 4932387, 4894843.)
1056 1250
1057 uint i;
1058 // Compute the byte offset where we can store the deopt pc. 1251 // Compute the byte offset where we can store the deopt pc.
1059 if (fixed_slots() != 0) { 1252 if (fixed_slots() != 0) {
1060 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); 1253 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
1061 } 1254 }
1062 1255
1076 // get rid of this hack and have SpillCopy generate stfspill/ldffill 1269 // get rid of this hack and have SpillCopy generate stfspill/ldffill
1077 // instead of stfd/stfs/ldfd/ldfs. 1270 // instead of stfd/stfs/ldfd/ldfs.
1078 _frame_slots += 8*(16/BytesPerInt); 1271 _frame_slots += 8*(16/BytesPerInt);
1079 } 1272 }
1080 #endif 1273 #endif
1081 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); 1274 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check");
1082
1083 // Create an array of unused labels, one for each basic block
1084 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1085
1086 for( i=0; i <= _cfg->_num_blocks; i++ ) {
1087 blk_labels[i].init();
1088 }
1089 1275
1090 if (has_mach_constant_base_node()) { 1276 if (has_mach_constant_base_node()) {
1091 // Fill the constant table. 1277 // Fill the constant table.
1092 // Note: This must happen before Shorten_branches. 1278 // Note: This must happen before shorten_branches.
1093 for (i = 0; i < _cfg->_num_blocks; i++) { 1279 for (uint i = 0; i < _cfg->_num_blocks; i++) {
1094 Block* b = _cfg->_blocks[i]; 1280 Block* b = _cfg->_blocks[i];
1095 1281
1096 for (uint j = 0; j < b->_nodes.size(); j++) { 1282 for (uint j = 0; j < b->_nodes.size(); j++) {
1097 Node* n = b->_nodes[j]; 1283 Node* n = b->_nodes[j];
1098 1284
1112 } 1298 }
1113 1299
1114 // Initialize the space for the BufferBlob used to find and verify 1300 // Initialize the space for the BufferBlob used to find and verify
1115 // instruction size in MachNode::emit_size() 1301 // instruction size in MachNode::emit_size()
1116 init_scratch_buffer_blob(const_req); 1302 init_scratch_buffer_blob(const_req);
1117 if (failing()) return; // Out of memory 1303 if (failing()) return NULL; // Out of memory
1118 1304
1119 // If this machine supports different size branch offsets, then pre-compute 1305 // Pre-compute the length of blocks and replace
1120 // the length of the blocks 1306 // long branches with short if machine supports it.
1121 if( _matcher->is_short_branch_offset(-1, 0) ) { 1307 shorten_branches(blk_starts, code_req, locs_req, stub_req);
1122 Shorten_branches(blk_labels, code_req, locs_req, stub_req);
1123 labels_not_set = false;
1124 }
1125 1308
1126 // nmethod and CodeBuffer count stubs & constants as part of method's code. 1309 // nmethod and CodeBuffer count stubs & constants as part of method's code.
1127 int exception_handler_req = size_exception_handler(); 1310 int exception_handler_req = size_exception_handler();
1128 int deopt_handler_req = size_deopt_handler(); 1311 int deopt_handler_req = size_deopt_handler();
1129 exception_handler_req += MAX_stubs_size; // add marginal slop for handler 1312 exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1149 cb->initialize(total_req, locs_req); 1332 cb->initialize(total_req, locs_req);
1150 1333
1151 // Have we run out of code space? 1334 // Have we run out of code space?
1152 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { 1335 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1153 turn_off_compiler(this); 1336 turn_off_compiler(this);
1154 return; 1337 return NULL;
1155 } 1338 }
1156 // Configure the code buffer. 1339 // Configure the code buffer.
1157 cb->initialize_consts_size(const_req); 1340 cb->initialize_consts_size(const_req);
1158 cb->initialize_stubs_size(stub_req); 1341 cb->initialize_stubs_size(stub_req);
1159 cb->initialize_oop_recorder(env()->oop_recorder()); 1342 cb->initialize_oop_recorder(env()->oop_recorder());
1160 1343
1161 // fill in the nop array for bundling computations 1344 // fill in the nop array for bundling computations
1162 MachNode *_nop_list[Bundle::_nop_count]; 1345 MachNode *_nop_list[Bundle::_nop_count];
1163 Bundle::initialize_nops(_nop_list, this); 1346 Bundle::initialize_nops(_nop_list, this);
1164 1347
1348 return cb;
1349 }
1350
1351 //------------------------------fill_buffer------------------------------------
1352 void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
1353
1165 // Create oopmap set. 1354 // Create oopmap set.
1166 _oop_map_set = new OopMapSet(); 1355 _oop_map_set = new OopMapSet();
1167 1356
1168 // !!!!! This preserves old handling of oopmaps for now 1357 // !!!!! This preserves old handling of oopmaps for now
1169 debug_info()->set_oopmaps(_oop_map_set); 1358 debug_info()->set_oopmaps(_oop_map_set);
1178 uint return_offset = 0; 1367 uint return_offset = 0;
1179 int nop_size = (new (this) MachNopNode())->size(_regalloc); 1368 int nop_size = (new (this) MachNopNode())->size(_regalloc);
1180 1369
1181 int previous_offset = 0; 1370 int previous_offset = 0;
1182 int current_offset = 0; 1371 int current_offset = 0;
1372 #ifdef ASSERT
1183 int last_call_offset = -1; 1373 int last_call_offset = -1;
1184 1374 int last_avoid_back_to_back_offset = -1;
1375 #endif
1185 // Create an array of unused labels, one for each basic block, if printing is enabled 1376 // Create an array of unused labels, one for each basic block, if printing is enabled
1186 #ifndef PRODUCT 1377 #ifndef PRODUCT
1187 int *node_offsets = NULL; 1378 int *node_offsets = NULL;
1188 uint node_offset_limit = unique(); 1379 uint node_offset_limit = unique();
1189 1380
1190 1381 if (print_assembly())
1191 if ( print_assembly() )
1192 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); 1382 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit);
1193 #endif 1383 #endif
1194 1384
1195 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily 1385 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
1196 1386
1197 // Emit the constant table. 1387 // Emit the constant table.
1198 if (has_mach_constant_base_node()) { 1388 if (has_mach_constant_base_node()) {
1199 constant_table().emit(*cb); 1389 constant_table().emit(*cb);
1200 } 1390 }
1201 1391
1392 // Create an array of labels, one for each basic block
1393 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1394 for (uint i=0; i <= _cfg->_num_blocks; i++) {
1395 blk_labels[i].init();
1396 }
1397
1202 // ------------------ 1398 // ------------------
1203 // Now fill in the code buffer 1399 // Now fill in the code buffer
1204 Node *delay_slot = NULL; 1400 Node *delay_slot = NULL;
1205 1401
1206 for( i=0; i < _cfg->_num_blocks; i++ ) { 1402 for (uint i=0; i < _cfg->_num_blocks; i++) {
1403 guarantee(blk_starts[i] == (uint)cb->insts_size(),"should not change size");
1404
1207 Block *b = _cfg->_blocks[i]; 1405 Block *b = _cfg->_blocks[i];
1208 1406
1209 Node *head = b->head(); 1407 Node *head = b->head();
1210 1408
1211 // If this block needs to start aligned (i.e, can be reached other 1409 // If this block needs to start aligned (i.e, can be reached other
1212 // than by falling-thru from the previous block), then force the 1410 // than by falling-thru from the previous block), then force the
1213 // start of a new bundle. 1411 // start of a new bundle.
1214 if( Pipeline::requires_bundling() && starts_bundle(head) ) 1412 if (Pipeline::requires_bundling() && starts_bundle(head))
1215 cb->flush_bundle(true); 1413 cb->flush_bundle(true);
1216 1414
1415 #ifdef ASSERT
1416 if (!b->is_connector()) {
1417 stringStream st;
1418 b->dump_head(&_cfg->_bbs, &st);
1419 MacroAssembler(cb).block_comment(st.as_string());
1420 }
1421 #endif
1422
1217 // Define the label at the beginning of the basic block 1423 // Define the label at the beginning of the basic block
1218 if (labels_not_set) { 1424 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1219 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1220 } else {
1221 assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1222 err_msg("label position does not match code offset: %d != %d",
1223 blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
1224 }
1225 1425
1226 uint last_inst = b->_nodes.size(); 1426 uint last_inst = b->_nodes.size();
1227 1427
1228 // Emit block normally, except for last instruction. 1428 // Emit block normally, except for last instruction.
1229 // Emit means "dump code bits into code buffer". 1429 // Emit means "dump code bits into code buffer".
1230 for( uint j = 0; j<last_inst; j++ ) { 1430 for (uint j = 0; j<last_inst; j++) {
1231 1431
1232 // Get the node 1432 // Get the node
1233 Node* n = b->_nodes[j]; 1433 Node* n = b->_nodes[j];
1234 1434
1235 // See if delay slots are supported 1435 // See if delay slots are supported
1242 continue; 1442 continue;
1243 } 1443 }
1244 1444
1245 // If this starts a new instruction group, then flush the current one 1445 // If this starts a new instruction group, then flush the current one
1246 // (but allow split bundles) 1446 // (but allow split bundles)
1247 if( Pipeline::requires_bundling() && starts_bundle(n) ) 1447 if (Pipeline::requires_bundling() && starts_bundle(n))
1248 cb->flush_bundle(false); 1448 cb->flush_bundle(false);
1249 1449
1250 // The following logic is duplicated in the code ifdeffed for 1450 // The following logic is duplicated in the code ifdeffed for
1251 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It 1451 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It
1252 // should be factored out. Or maybe dispersed to the nodes? 1452 // should be factored out. Or maybe dispersed to the nodes?
1253 1453
1254 // Special handling for SafePoint/Call Nodes 1454 // Special handling for SafePoint/Call Nodes
1255 bool is_mcall = false; 1455 bool is_mcall = false;
1256 if( n->is_Mach() ) { 1456 if (n->is_Mach()) {
1257 MachNode *mach = n->as_Mach(); 1457 MachNode *mach = n->as_Mach();
1258 is_mcall = n->is_MachCall(); 1458 is_mcall = n->is_MachCall();
1259 bool is_sfn = n->is_MachSafePoint(); 1459 bool is_sfn = n->is_MachSafePoint();
1260 1460
1261 // If this requires all previous instructions be flushed, then do so 1461 // If this requires all previous instructions be flushed, then do so
1262 if( is_sfn || is_mcall || mach->alignment_required() != 1) { 1462 if (is_sfn || is_mcall || mach->alignment_required() != 1) {
1263 cb->flush_bundle(true); 1463 cb->flush_bundle(true);
1264 current_offset = cb->insts_size(); 1464 current_offset = cb->insts_size();
1265 } 1465 }
1466
1467 #ifdef ASSERT
1468 // A padding may be needed again since a previous instruction
1469 // could be moved to delay slot.
1266 1470
1267 // align the instruction if necessary 1471 // align the instruction if necessary
1268 int padding = mach->compute_padding(current_offset); 1472 int padding = mach->compute_padding(current_offset);
1269 // Make sure safepoint node for polling is distinct from a call's 1473 // Make sure safepoint node for polling is distinct from a call's
1270 // return by adding a nop if needed. 1474 // return by adding a nop if needed.
1271 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { 1475 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
1272 padding = nop_size; 1476 padding = nop_size;
1273 } 1477 }
1274 assert( labels_not_set || padding == 0, "instruction should already be aligned"); 1478 if (padding == 0 && mach->avoid_back_to_back() &&
1275 1479 current_offset == last_avoid_back_to_back_offset) {
1276 if(padding > 0) { 1480 // Avoid back to back some instructions.
1277 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); 1481 padding = nop_size;
1278 int nops_cnt = padding / nop_size;
1279 MachNode *nop = new (this) MachNopNode(nops_cnt);
1280 b->_nodes.insert(j++, nop);
1281 last_inst++;
1282 _cfg->_bbs.map( nop->_idx, b );
1283 nop->emit(*cb, _regalloc);
1284 cb->flush_bundle(true);
1285 current_offset = cb->insts_size();
1286 } 1482 }
1287 1483 assert(padding == 0, "padding should be added already");
1484 #endif
1288 // Remember the start of the last call in a basic block 1485 // Remember the start of the last call in a basic block
1289 if (is_mcall) { 1486 if (is_mcall) {
1290 MachCallNode *mcall = mach->as_MachCall(); 1487 MachCallNode *mcall = mach->as_MachCall();
1291 1488
1292 // This destination address is NOT PC-relative 1489 // This destination address is NOT PC-relative
1300 is_sfn = false; 1497 is_sfn = false;
1301 } 1498 }
1302 } 1499 }
1303 1500
1304 // sfn will be valid whenever mcall is valid now because of inheritance 1501 // sfn will be valid whenever mcall is valid now because of inheritance
1305 if( is_sfn || is_mcall ) { 1502 if (is_sfn || is_mcall) {
1306 1503
1307 // Handle special safepoint nodes for synchronization 1504 // Handle special safepoint nodes for synchronization
1308 if( !is_mcall ) { 1505 if (!is_mcall) {
1309 MachSafePointNode *sfn = mach->as_MachSafePoint(); 1506 MachSafePointNode *sfn = mach->as_MachSafePoint();
1310 // !!!!! Stubs only need an oopmap right now, so bail out 1507 // !!!!! Stubs only need an oopmap right now, so bail out
1311 if( sfn->jvms()->method() == NULL) { 1508 if (sfn->jvms()->method() == NULL) {
1312 // Write the oopmap directly to the code blob??!! 1509 // Write the oopmap directly to the code blob??!!
1313 # ifdef ENABLE_ZAP_DEAD_LOCALS 1510 # ifdef ENABLE_ZAP_DEAD_LOCALS
1314 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); 1511 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive");
1315 # endif 1512 # endif
1316 continue; 1513 continue;
1326 else if( mach->is_MachNullCheck() ) { 1523 else if( mach->is_MachNullCheck() ) {
1327 inct_starts[inct_cnt++] = previous_offset; 1524 inct_starts[inct_cnt++] = previous_offset;
1328 } 1525 }
1329 1526
1330 // If this is a branch, then fill in the label with the target BB's label 1527 // If this is a branch, then fill in the label with the target BB's label
1331 else if ( mach->is_Branch() ) { 1528 else if (mach->is_Branch()) {
1332 1529
1333 if ( mach->ideal_Opcode() == Op_Jump ) { 1530 if (mach->ideal_Opcode() == Op_Jump) {
1334 for (uint h = 0; h < b->_num_succs; h++ ) { 1531 for (uint h = 0; h < b->_num_succs; h++) {
1335 Block* succs_block = b->_succs[h]; 1532 Block* succs_block = b->_succs[h];
1336 for (uint j = 1; j < succs_block->num_preds(); j++) { 1533 for (uint j = 1; j < succs_block->num_preds(); j++) {
1337 Node* jpn = succs_block->pred(j); 1534 Node* jpn = succs_block->pred(j);
1338 if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { 1535 if (jpn->is_JumpProj() && jpn->in(0) == mach) {
1339 uint block_num = succs_block->non_connector()->_pre_order; 1536 uint block_num = succs_block->non_connector()->_pre_order;
1340 Label *blkLabel = &blk_labels[block_num]; 1537 Label *blkLabel = &blk_labels[block_num];
1341 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); 1538 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
1342 } 1539 }
1343 } 1540 }
1350 } 1547 }
1351 } 1548 }
1352 1549
1353 #ifdef ASSERT 1550 #ifdef ASSERT
1354 // Check that oop-store precedes the card-mark 1551 // Check that oop-store precedes the card-mark
1355 else if( mach->ideal_Opcode() == Op_StoreCM ) { 1552 else if (mach->ideal_Opcode() == Op_StoreCM) {
1356 uint storeCM_idx = j; 1553 uint storeCM_idx = j;
1357 int count = 0; 1554 int count = 0;
1358 for (uint prec = mach->req(); prec < mach->len(); prec++) { 1555 for (uint prec = mach->req(); prec < mach->len(); prec++) {
1359 Node *oop_store = mach->in(prec); // Precedence edge 1556 Node *oop_store = mach->in(prec); // Precedence edge
1360 if (oop_store == NULL) continue; 1557 if (oop_store == NULL) continue;
1369 } 1566 }
1370 assert(count > 0, "storeCM expects at least one precedence edge"); 1567 assert(count > 0, "storeCM expects at least one precedence edge");
1371 } 1568 }
1372 #endif 1569 #endif
1373 1570
1374 else if( !n->is_Proj() ) { 1571 else if (!n->is_Proj()) {
1375 // Remember the beginning of the previous instruction, in case 1572 // Remember the beginning of the previous instruction, in case
1376 // it's followed by a flag-kill and a null-check. Happens on 1573 // it's followed by a flag-kill and a null-check. Happens on
1377 // Intel all the time, with add-to-memory kind of opcodes. 1574 // Intel all the time, with add-to-memory kind of opcodes.
1378 previous_offset = current_offset; 1575 previous_offset = current_offset;
1379 } 1576 }
1386 return; 1583 return;
1387 } 1584 }
1388 1585
1389 // Save the offset for the listing 1586 // Save the offset for the listing
1390 #ifndef PRODUCT 1587 #ifndef PRODUCT
1391 if( node_offsets && n->_idx < node_offset_limit ) 1588 if (node_offsets && n->_idx < node_offset_limit)
1392 node_offsets[n->_idx] = cb->insts_size(); 1589 node_offsets[n->_idx] = cb->insts_size();
1393 #endif 1590 #endif
1394 1591
1395 // "Normal" instruction case 1592 // "Normal" instruction case
1593 DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
1396 n->emit(*cb, _regalloc); 1594 n->emit(*cb, _regalloc);
1397 current_offset = cb->insts_size(); 1595 current_offset = cb->insts_size();
1596
1597 #ifdef ASSERT
1598 if (n->size(_regalloc) != (current_offset-instr_offset)) {
1599 n->dump();
1600 assert(n->size(_regalloc) == (current_offset-instr_offset), "wrong size of mach node");
1601 }
1602 #endif
1398 non_safepoints.observe_instruction(n, current_offset); 1603 non_safepoints.observe_instruction(n, current_offset);
1399 1604
1605 #ifdef ASSERT
1400 // mcall is last "call" that can be a safepoint 1606 // mcall is last "call" that can be a safepoint
1401 // record it so we can see if a poll will directly follow it 1607 // record it so we can see if a poll will directly follow it
1402 // in which case we'll need a pad to make the PcDesc sites unique 1608 // in which case we'll need a pad to make the PcDesc sites unique
1403 // see 5010568. This can be slightly inaccurate but conservative 1609 // see 5010568. This can be slightly inaccurate but conservative
1404 // in the case that return address is not actually at current_offset. 1610 // in the case that return address is not actually at current_offset.
1406 1612
1407 if (is_mcall) { 1613 if (is_mcall) {
1408 last_call_offset = current_offset; 1614 last_call_offset = current_offset;
1409 } 1615 }
1410 1616
1617 if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) {
1618 // Avoid back to back some instructions.
1619 last_avoid_back_to_back_offset = current_offset;
1620 }
1621 #endif
1622
1411 // See if this instruction has a delay slot 1623 // See if this instruction has a delay slot
1412 if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { 1624 if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
1413 assert(delay_slot != NULL, "expecting delay slot node"); 1625 assert(delay_slot != NULL, "expecting delay slot node");
1414 1626
1415 // Back up 1 instruction 1627 // Back up 1 instruction
1416 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); 1628 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
1417 1629
1418 // Save the offset for the listing 1630 // Save the offset for the listing
1419 #ifndef PRODUCT 1631 #ifndef PRODUCT
1420 if( node_offsets && delay_slot->_idx < node_offset_limit ) 1632 if (node_offsets && delay_slot->_idx < node_offset_limit)
1421 node_offsets[delay_slot->_idx] = cb->insts_size(); 1633 node_offsets[delay_slot->_idx] = cb->insts_size();
1422 #endif 1634 #endif
1423 1635
1424 // Support a SafePoint in the delay slot 1636 // Support a SafePoint in the delay slot
1425 if( delay_slot->is_MachSafePoint() ) { 1637 if (delay_slot->is_MachSafePoint()) {
1426 MachNode *mach = delay_slot->as_Mach(); 1638 MachNode *mach = delay_slot->as_Mach();
1427 // !!!!! Stubs only need an oopmap right now, so bail out 1639 // !!!!! Stubs only need an oopmap right now, so bail out
1428 if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { 1640 if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) {
1429 // Write the oopmap directly to the code blob??!! 1641 // Write the oopmap directly to the code blob??!!
1430 # ifdef ENABLE_ZAP_DEAD_LOCALS 1642 # ifdef ENABLE_ZAP_DEAD_LOCALS
1431 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); 1643 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive");
1432 # endif 1644 # endif
1433 delay_slot = NULL; 1645 delay_slot = NULL;
1447 // Don't reuse it 1659 // Don't reuse it
1448 delay_slot = NULL; 1660 delay_slot = NULL;
1449 } 1661 }
1450 1662
1451 } // End for all instructions in block 1663 } // End for all instructions in block
1452 1664 #ifdef ASSERT
1453 // If the next block is the top of a loop, pad this block out to align 1665 // If the next block is the top of a loop, pad this block out to align
1454 // the loop top a little. Helps prevent pipe stalls at loop back branches. 1666 // the loop top a little. Helps prevent pipe stalls at loop back branches.
1455 if( i<_cfg->_num_blocks-1 ) { 1667 if (i < _cfg->_num_blocks-1) {
1456 Block *nb = _cfg->_blocks[i+1]; 1668 Block *nb = _cfg->_blocks[i+1];
1457 uint padding = nb->alignment_padding(current_offset); 1669 uint padding = nb->alignment_padding(current_offset);
1458 if( padding > 0 ) { 1670 assert(padding == 0, "alignment should be added already");
1459 MachNode *nop = new (this) MachNopNode(padding / nop_size); 1671 }
1460 b->_nodes.insert( b->_nodes.size(), nop ); 1672 #endif
1461 _cfg->_bbs.map( nop->_idx, b );
1462 nop->emit(*cb, _regalloc);
1463 current_offset = cb->insts_size();
1464 }
1465 }
1466
1467 } // End of for all blocks 1673 } // End of for all blocks
1468 1674
1469 non_safepoints.flush_at_end(); 1675 non_safepoints.flush_at_end();
1470 1676
1471 // Offset too large? 1677 // Offset too large?
1741 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) 1947 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1742 1948
1743 // Create a data structure for all the scheduling information 1949 // Create a data structure for all the scheduling information
1744 Scheduling scheduling(Thread::current()->resource_area(), *this); 1950 Scheduling scheduling(Thread::current()->resource_area(), *this);
1745 1951
1746 // Initialize the space for the BufferBlob used to find and verify
1747 // instruction size in MachNode::emit_size()
1748 init_scratch_buffer_blob(MAX_const_size);
1749 if (failing()) return; // Out of memory
1750
1751 // Walk backwards over each basic block, computing the needed alignment 1952 // Walk backwards over each basic block, computing the needed alignment
1752 // Walk over all the basic blocks 1953 // Walk over all the basic blocks
1753 scheduling.DoScheduling(); 1954 scheduling.DoScheduling();
1754 } 1955 }
1755 1956
2344 // normally schedule conditional branches (despite them being forced last 2545 // normally schedule conditional branches (despite them being forced last
2345 // in the block), because they have delay slots we can fill. Calls all 2546 // in the block), because they have delay slots we can fill. Calls all
2346 // have their delay slots filled in the template expansions, so we don't 2547 // have their delay slots filled in the template expansions, so we don't
2347 // bother scheduling them. 2548 // bother scheduling them.
2348 Node *last = bb->_nodes[_bb_end]; 2549 Node *last = bb->_nodes[_bb_end];
2550 // Ignore trailing NOPs.
2551 while (_bb_end > 0 && last->is_Mach() &&
2552 last->as_Mach()->ideal_Opcode() == Op_Con) {
2553 last = bb->_nodes[--_bb_end];
2554 }
2555 assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
2349 if( last->is_Catch() || 2556 if( last->is_Catch() ||
2350 // Exclude unreachable path case when Halt node is in a separate block. 2557 // Exclude unreachable path case when Halt node is in a separate block.
2351 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { 2558 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
2352 // There must be a prior call. Skip it. 2559 // There must be a prior call. Skip it.
2353 while( !bb->_nodes[--_bb_end]->is_MachCall() ) { 2560 while( !bb->_nodes[--_bb_end]->is_MachCall() ) {
2676 } 2883 }
2677 } else { 2884 } else {
2678 // Get DEF'd registers the normal way 2885 // Get DEF'd registers the normal way
2679 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); 2886 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
2680 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); 2887 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
2888 }
2889
2890 // Kill projections on a branch should appear to occur on the
2891 // branch, not afterwards, so grab the masks from the projections
2892 // and process them.
2893 if (n->is_Branch()) {
2894 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2895 Node* use = n->fast_out(i);
2896 if (use->is_Proj()) {
2897 RegMask rm = use->out_RegMask();// Make local copy
2898 while( rm.is_NotEmpty() ) {
2899 OptoReg::Name kill = rm.find_first_elem();
2900 rm.Remove(kill);
2901 anti_do_def( b, n, kill, false );
2902 }
2903 }
2904 }
2681 } 2905 }
2682 2906
2683 // Check each register used by this instruction for a following DEF/KILL 2907 // Check each register used by this instruction for a following DEF/KILL
2684 // that must occur afterward and requires an anti-dependence edge. 2908 // that must occur afterward and requires an anti-dependence edge.
2685 for( uint j=0; j<n->req(); j++ ) { 2909 for( uint j=0; j<n->req(); j++ ) {