comparison src/share/vm/opto/output.cpp @ 4137:04b9a2566eec

Merge with hsx23/hotspot.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Sat, 17 Dec 2011 21:40:27 +0100
parents 0e8a2a629afb 739a9abbbd4b
children 597bc897257d
comparison
equal deleted inserted replaced
3737:9dc19b7d89a3 4137:04b9a2566eec
126 126
127 # ifdef ENABLE_ZAP_DEAD_LOCALS 127 # ifdef ENABLE_ZAP_DEAD_LOCALS
128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); 128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
129 # endif 129 # endif
130 130
131 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
132 blk_starts[0] = 0;
133
134 // Initialize code buffer and process short branches.
135 CodeBuffer* cb = init_buffer(blk_starts);
136
137 if (cb == NULL || failing()) return;
138
131 ScheduleAndBundle(); 139 ScheduleAndBundle();
132 140
133 #ifndef PRODUCT 141 #ifndef PRODUCT
134 if (trace_opto_output()) { 142 if (trace_opto_output()) {
135 tty->print("\n---- After ScheduleAndBundle ----\n"); 143 tty->print("\n---- After ScheduleAndBundle ----\n");
150 158
151 BuildOopMaps(); 159 BuildOopMaps();
152 160
153 if (failing()) return; 161 if (failing()) return;
154 162
155 Fill_buffer(); 163 fill_buffer(cb, blk_starts);
156 } 164 }
157 165
158 bool Compile::need_stack_bang(int frame_size_in_bytes) const { 166 bool Compile::need_stack_bang(int frame_size_in_bytes) const {
159 // Determine if we need to generate a stack overflow check. 167 // Determine if we need to generate a stack overflow check.
160 // Do it if the method is not a stub function and 168 // Do it if the method is not a stub function and
323 } // f( b->head()->is_Loop() ) 331 } // f( b->head()->is_Loop() )
324 } // for( i <= last_block ) 332 } // for( i <= last_block )
325 } // if( MaxLoopPad < OptoLoopAlignment-1 ) 333 } // if( MaxLoopPad < OptoLoopAlignment-1 )
326 } 334 }
327 335
328 //----------------------Shorten_branches--------------------------------------- 336 //----------------------shorten_branches---------------------------------------
329 // The architecture description provides short branch variants for some long 337 // The architecture description provides short branch variants for some long
330 // branch instructions. Replace eligible long branches with short branches. 338 // branch instructions. Replace eligible long branches with short branches.
331 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) { 339 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
332
333 // fill in the nop array for bundling computations
334 MachNode *_nop_list[Bundle::_nop_count];
335 Bundle::initialize_nops(_nop_list, this);
336 340
337 // ------------------ 341 // ------------------
338 // Compute size of each block, method size, and relocation information size 342 // Compute size of each block, method size, and relocation information size
339 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); 343 uint nblocks = _cfg->_num_blocks;
340 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); 344
341 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) 345 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
342 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) 346 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
343 blk_starts[0] = 0; 347 int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks);
348 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
349 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
350
351 bool has_short_branch_candidate = false;
344 352
345 // Initialize the sizes to 0 353 // Initialize the sizes to 0
346 code_size = 0; // Size in bytes of generated code 354 code_size = 0; // Size in bytes of generated code
347 stub_size = 0; // Size in bytes of all stub entries 355 stub_size = 0; // Size in bytes of all stub entries
348 // Size in bytes of all relocation entries, including those in local stubs. 356 // Size in bytes of all relocation entries, including those in local stubs.
349 // Start with 2-bytes of reloc info for the unvalidated entry point 357 // Start with 2-bytes of reloc info for the unvalidated entry point
350 reloc_size = 1; // Number of relocation entries 358 reloc_size = 1; // Number of relocation entries
351 359
352 // Make three passes. The first computes pessimistic blk_starts, 360 // Make three passes. The first computes pessimistic blk_starts,
353 // relative jmp_end and reloc_size information. The second performs 361 // relative jmp_offset and reloc_size information. The second performs
354 // short branch substitution using the pessimistic sizing. The 362 // short branch substitution using the pessimistic sizing. The
355 // third inserts nops where needed. 363 // third inserts nops where needed.
356 364
357 Node *nj; // tmp
358
359 // Step one, perform a pessimistic sizing pass. 365 // Step one, perform a pessimistic sizing pass.
360 uint i; 366 uint last_call_adr = max_uint;
361 uint min_offset_from_last_call = 1; // init to a positive value 367 uint last_avoid_back_to_back_adr = max_uint;
362 uint nop_size = (new (this) MachNopNode())->size(_regalloc); 368 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
363 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks 369 for (uint i = 0; i < nblocks; i++) { // For all blocks
364 Block *b = _cfg->_blocks[i]; 370 Block *b = _cfg->_blocks[i];
371
372 // During short branch replacement, we store the relative (to blk_starts)
373 // offset of jump in jmp_offset, rather than the absolute offset of jump.
374 // This is so that we do not need to recompute sizes of all nodes when
375 // we compute correct blk_starts in our next sizing pass.
376 jmp_offset[i] = 0;
377 jmp_size[i] = 0;
378 jmp_nidx[i] = -1;
379 DEBUG_ONLY( jmp_target[i] = 0; )
380 DEBUG_ONLY( jmp_rule[i] = 0; )
365 381
366 // Sum all instruction sizes to compute block size 382 // Sum all instruction sizes to compute block size
367 uint last_inst = b->_nodes.size(); 383 uint last_inst = b->_nodes.size();
368 uint blk_size = 0; 384 uint blk_size = 0;
369 for( uint j = 0; j<last_inst; j++ ) { 385 for (uint j = 0; j < last_inst; j++) {
370 nj = b->_nodes[j]; 386 Node* nj = b->_nodes[j];
371 uint inst_size = nj->size(_regalloc);
372 blk_size += inst_size;
373 // Handle machine instruction nodes 387 // Handle machine instruction nodes
374 if( nj->is_Mach() ) { 388 if (nj->is_Mach()) {
375 MachNode *mach = nj->as_Mach(); 389 MachNode *mach = nj->as_Mach();
376 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding 390 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
377 reloc_size += mach->reloc(); 391 reloc_size += mach->reloc();
378 if( mach->is_MachCall() ) { 392 if( mach->is_MachCall() ) {
379 MachCallNode *mcall = mach->as_MachCall(); 393 MachCallNode *mcall = mach->as_MachCall();
386 reloc_size += reloc_java_to_interp(); 400 reloc_size += reloc_java_to_interp();
387 } 401 }
388 } else if (mach->is_MachSafePoint()) { 402 } else if (mach->is_MachSafePoint()) {
389 // If call/safepoint are adjacent, account for possible 403 // If call/safepoint are adjacent, account for possible
390 // nop to disambiguate the two safepoints. 404 // nop to disambiguate the two safepoints.
391 if (min_offset_from_last_call == 0) { 405 // ScheduleAndBundle() can rearrange nodes in a block,
406 // check for all offsets inside this block.
407 if (last_call_adr >= blk_starts[i]) {
392 blk_size += nop_size; 408 blk_size += nop_size;
393 } 409 }
394 } 410 }
395 } 411 if (mach->avoid_back_to_back()) {
396 min_offset_from_last_call += inst_size; 412 // Nop is inserted between "avoid back to back" instructions.
413 // ScheduleAndBundle() can rearrange nodes in a block,
414 // check for all offsets inside this block.
415 if (last_avoid_back_to_back_adr >= blk_starts[i]) {
416 blk_size += nop_size;
417 }
418 }
419 if (mach->may_be_short_branch()) {
420 if (!nj->is_MachBranch()) {
421 #ifndef PRODUCT
422 nj->dump(3);
423 #endif
424 Unimplemented();
425 }
426 assert(jmp_nidx[i] == -1, "block should have only one branch");
427 jmp_offset[i] = blk_size;
428 jmp_size[i] = nj->size(_regalloc);
429 jmp_nidx[i] = j;
430 has_short_branch_candidate = true;
431 }
432 }
433 blk_size += nj->size(_regalloc);
397 // Remember end of call offset 434 // Remember end of call offset
398 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) { 435 if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
399 min_offset_from_last_call = 0; 436 last_call_adr = blk_starts[i]+blk_size;
400 } 437 }
401 } 438 // Remember end of avoid_back_to_back offset
402 439 if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) {
403 // During short branch replacement, we store the relative (to blk_starts) 440 last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
404 // end of jump in jmp_end, rather than the absolute end of jump. This 441 }
405 // is so that we do not need to recompute sizes of all nodes when we compute 442 }
406 // correct blk_starts in our next sizing pass.
407 jmp_end[i] = blk_size;
408 DEBUG_ONLY( jmp_target[i] = 0; )
409 443
410 // When the next block starts a loop, we may insert pad NOP 444 // When the next block starts a loop, we may insert pad NOP
411 // instructions. Since we cannot know our future alignment, 445 // instructions. Since we cannot know our future alignment,
412 // assume the worst. 446 // assume the worst.
413 if( i<_cfg->_num_blocks-1 ) { 447 if (i< nblocks-1) {
414 Block *nb = _cfg->_blocks[i+1]; 448 Block *nb = _cfg->_blocks[i+1];
415 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); 449 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
416 if( max_loop_pad > 0 ) { 450 if (max_loop_pad > 0) {
417 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); 451 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
418 blk_size += max_loop_pad; 452 blk_size += max_loop_pad;
419 } 453 }
420 } 454 }
421 455
422 // Save block size; update total method size 456 // Save block size; update total method size
423 blk_starts[i+1] = blk_starts[i]+blk_size; 457 blk_starts[i+1] = blk_starts[i]+blk_size;
424 } 458 }
425 459
426 // Step two, replace eligible long jumps. 460 // Step two, replace eligible long jumps.
427 461 bool progress = true;
428 // Note: this will only get the long branches within short branch 462 uint last_may_be_short_branch_adr = max_uint;
429 // range. Another pass might detect more branches that became 463 while (has_short_branch_candidate && progress) {
430 // candidates because the shortening in the first pass exposed 464 progress = false;
431 // more opportunities. Unfortunately, this would require 465 has_short_branch_candidate = false;
432 // recomputing the starting and ending positions for the blocks 466 int adjust_block_start = 0;
433 for( i=0; i<_cfg->_num_blocks; i++ ) { 467 for (uint i = 0; i < nblocks; i++) {
434 Block *b = _cfg->_blocks[i]; 468 Block *b = _cfg->_blocks[i];
435 469 int idx = jmp_nidx[i];
436 int j; 470 MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
437 // Find the branch; ignore trailing NOPs. 471 if (mach != NULL && mach->may_be_short_branch()) {
438 for( j = b->_nodes.size()-1; j>=0; j-- ) { 472 #ifdef ASSERT
439 nj = b->_nodes[j]; 473 assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
440 if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) 474 int j;
441 break; 475 // Find the branch; ignore trailing NOPs.
442 } 476 for (j = b->_nodes.size()-1; j>=0; j--) {
443 477 Node* n = b->_nodes[j];
444 if (j >= 0) { 478 if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
445 if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { 479 break;
446 MachNode *mach = nj->as_Mach(); 480 }
481 assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
482 #endif
483 int br_size = jmp_size[i];
484 int br_offs = blk_starts[i] + jmp_offset[i];
485
447 // This requires the TRUE branch target be in succs[0] 486 // This requires the TRUE branch target be in succs[0]
448 uint bnum = b->non_connector_successor(0)->_pre_order; 487 uint bnum = b->non_connector_successor(0)->_pre_order;
449 uintptr_t target = blk_starts[bnum]; 488 int offset = blk_starts[bnum] - br_offs;
450 if( mach->is_pc_relative() ) { 489 if (bnum > i) { // adjust following block's offset
451 int offset = target-(blk_starts[i] + jmp_end[i]); 490 offset -= adjust_block_start;
452 if (_matcher->is_short_branch_offset(mach->rule(), offset)) { 491 }
453 // We've got a winner. Replace this branch. 492 // In the following code a nop could be inserted before
454 MachNode* replacement = mach->short_branch_version(this); 493 // the branch which will increase the backward distance.
455 b->_nodes.map(j, replacement); 494 bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr);
456 mach->subsume_by(replacement); 495 if (needs_padding && offset <= 0)
457 496 offset -= nop_size;
458 // Update the jmp_end size to save time in our 497
459 // next pass. 498 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
460 jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); 499 // We've got a winner. Replace this branch.
461 DEBUG_ONLY( jmp_target[i] = bnum; ); 500 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
462 DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); 501
502 // Update the jmp_size.
503 int new_size = replacement->size(_regalloc);
504 int diff = br_size - new_size;
505 assert(diff >= (int)nop_size, "short_branch size should be smaller");
506 // Conservatively take into accound padding between
507 // avoid_back_to_back branches. Previous branch could be
508 // converted into avoid_back_to_back branch during next
509 // rounds.
510 if (needs_padding && replacement->avoid_back_to_back()) {
511 jmp_offset[i] += nop_size;
512 diff -= nop_size;
463 } 513 }
514 adjust_block_start += diff;
515 b->_nodes.map(idx, replacement);
516 mach->subsume_by(replacement);
517 mach = replacement;
518 progress = true;
519
520 jmp_size[i] = new_size;
521 DEBUG_ONLY( jmp_target[i] = bnum; );
522 DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
464 } else { 523 } else {
465 #ifndef PRODUCT 524 // The jump distance is not short, try again during next iteration.
466 mach->dump(3); 525 has_short_branch_candidate = true;
467 #endif
468 Unimplemented();
469 } 526 }
470 } 527 } // (mach->may_be_short_branch())
471 } 528 if (mach != NULL && (mach->may_be_short_branch() ||
472 } 529 mach->avoid_back_to_back())) {
473 530 last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
474 // Compute the size of first NumberOfLoopInstrToAlign instructions at head 531 }
475 // of a loop. It is used to determine the padding for loop alignment. 532 blk_starts[i+1] -= adjust_block_start;
476 compute_loop_first_inst_sizes();
477
478 // Step 3, compute the offsets of all the labels
479 uint last_call_adr = max_uint;
480 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
481 // copy the offset of the beginning to the corresponding label
482 assert(labels[i].is_unused(), "cannot patch at this point");
483 labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS);
484
485 // insert padding for any instructions that need it
486 Block *b = _cfg->_blocks[i];
487 uint last_inst = b->_nodes.size();
488 uint adr = blk_starts[i];
489 for( uint j = 0; j<last_inst; j++ ) {
490 nj = b->_nodes[j];
491 if( nj->is_Mach() ) {
492 int padding = nj->as_Mach()->compute_padding(adr);
493 // If call/safepoint are adjacent insert a nop (5010568)
494 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() &&
495 adr == last_call_adr ) {
496 padding = nop_size;
497 }
498 if(padding > 0) {
499 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
500 int nops_cnt = padding / nop_size;
501 MachNode *nop = new (this) MachNopNode(nops_cnt);
502 b->_nodes.insert(j++, nop);
503 _cfg->_bbs.map( nop->_idx, b );
504 adr += padding;
505 last_inst++;
506 }
507 }
508 adr += nj->size(_regalloc);
509
510 // Remember end of call offset
511 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
512 last_call_adr = adr;
513 }
514 }
515
516 if ( i != _cfg->_num_blocks-1) {
517 // Get the size of the block
518 uint blk_size = adr - blk_starts[i];
519
520 // When the next block is the top of a loop, we may insert pad NOP
521 // instructions.
522 Block *nb = _cfg->_blocks[i+1];
523 int current_offset = blk_starts[i] + blk_size;
524 current_offset += nb->alignment_padding(current_offset);
525 // Save block size; update total method size
526 blk_starts[i+1] = current_offset;
527 } 533 }
528 } 534 }
529 535
530 #ifdef ASSERT 536 #ifdef ASSERT
531 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks 537 for (uint i = 0; i < nblocks; i++) { // For all blocks
532 if( jmp_target[i] != 0 ) { 538 if (jmp_target[i] != 0) {
533 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); 539 int br_size = jmp_size[i];
534 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { 540 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
535 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); 541 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
536 } 542 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
537 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); 543 }
538 } 544 assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
539 } 545 }
540 #endif 546 }
547 #endif
548
549 // Step 3, compute the offsets of all blocks, will be done in fill_buffer()
550 // after ScheduleAndBundle().
541 551
542 // ------------------ 552 // ------------------
543 // Compute size for code buffer 553 // Compute size for code buffer
544 code_size = blk_starts[i-1] + jmp_end[i-1]; 554 code_size = blk_starts[nblocks];
545 555
546 // Relocation records 556 // Relocation records
547 reloc_size += 1; // Relo entry for exception handler 557 reloc_size += 1; // Relo entry for exception handler
548 558
549 // Adjust reloc_size to number of record of relocation info 559 // Adjust reloc_size to number of record of relocation info
550 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for 560 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
551 // a relocation index. 561 // a relocation index.
552 // The CodeBuffer will expand the locs array if this estimate is too low. 562 // The CodeBuffer will expand the locs array if this estimate is too low.
553 reloc_size *= 10 / sizeof(relocInfo); 563 reloc_size *= 10 / sizeof(relocInfo);
554 } 564 }
555 565
556 //------------------------------FillLocArray----------------------------------- 566 //------------------------------FillLocArray-----------------------------------
557 // Create a bit of debug info and append it to the array. The mapping is from 567 // Create a bit of debug info and append it to the array. The mapping is from
558 // Java local or expression stack to constant, register or stack-slot. For 568 // Java local or expression stack to constant, register or stack-slot. For
1024 debug_info->end_non_safepoint(pc_offset); 1034 debug_info->end_non_safepoint(pc_offset);
1025 } 1035 }
1026 1036
1027 1037
1028 1038
1029 // helper for Fill_buffer bailout logic 1039 // helper for fill_buffer bailout logic
1030 static void turn_off_compiler(Compile* C) { 1040 static void turn_off_compiler(Compile* C) {
1031 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { 1041 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) {
1032 // Do not turn off compilation if a single giant method has 1042 // Do not turn off compilation if a single giant method has
1033 // blown the code cache size. 1043 // blown the code cache size.
1034 C->record_failure("excessive request to CodeCache"); 1044 C->record_failure("excessive request to CodeCache");
1037 C->record_failure("CodeCache is full"); 1047 C->record_failure("CodeCache is full");
1038 } 1048 }
1039 } 1049 }
1040 1050
1041 1051
1042 //------------------------------Fill_buffer------------------------------------ 1052 //------------------------------init_buffer------------------------------------
1043 void Compile::Fill_buffer() { 1053 CodeBuffer* Compile::init_buffer(uint* blk_starts) {
1044 1054
1045 // Set the initially allocated size 1055 // Set the initially allocated size
1046 int code_req = initial_code_capacity; 1056 int code_req = initial_code_capacity;
1047 int locs_req = initial_locs_capacity; 1057 int locs_req = initial_locs_capacity;
1048 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; 1058 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
1049 int const_req = initial_const_capacity; 1059 int const_req = initial_const_capacity;
1050 bool labels_not_set = true;
1051 1060
1052 int pad_req = NativeCall::instruction_size; 1061 int pad_req = NativeCall::instruction_size;
1053 // The extra spacing after the code is necessary on some platforms. 1062 // The extra spacing after the code is necessary on some platforms.
1054 // Sometimes we need to patch in a jump after the last instruction, 1063 // Sometimes we need to patch in a jump after the last instruction,
1055 // if the nmethod has been deoptimized. (See 4932387, 4894843.) 1064 // if the nmethod has been deoptimized. (See 4932387, 4894843.)
1056 1065
1057 uint i;
1058 // Compute the byte offset where we can store the deopt pc. 1066 // Compute the byte offset where we can store the deopt pc.
1059 if (fixed_slots() != 0) { 1067 if (fixed_slots() != 0) {
1060 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); 1068 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
1061 } 1069 }
1062 1070
1076 // get rid of this hack and have SpillCopy generate stfspill/ldffill 1084 // get rid of this hack and have SpillCopy generate stfspill/ldffill
1077 // instead of stfd/stfs/ldfd/ldfs. 1085 // instead of stfd/stfs/ldfd/ldfs.
1078 _frame_slots += 8*(16/BytesPerInt); 1086 _frame_slots += 8*(16/BytesPerInt);
1079 } 1087 }
1080 #endif 1088 #endif
1081 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); 1089 assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check");
1082
1083 // Create an array of unused labels, one for each basic block
1084 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1085
1086 for( i=0; i <= _cfg->_num_blocks; i++ ) {
1087 blk_labels[i].init();
1088 }
1089 1090
1090 if (has_mach_constant_base_node()) { 1091 if (has_mach_constant_base_node()) {
1091 // Fill the constant table. 1092 // Fill the constant table.
1092 // Note: This must happen before Shorten_branches. 1093 // Note: This must happen before shorten_branches.
1093 for (i = 0; i < _cfg->_num_blocks; i++) { 1094 for (uint i = 0; i < _cfg->_num_blocks; i++) {
1094 Block* b = _cfg->_blocks[i]; 1095 Block* b = _cfg->_blocks[i];
1095 1096
1096 for (uint j = 0; j < b->_nodes.size(); j++) { 1097 for (uint j = 0; j < b->_nodes.size(); j++) {
1097 Node* n = b->_nodes[j]; 1098 Node* n = b->_nodes[j];
1098 1099
1112 } 1113 }
1113 1114
1114 // Initialize the space for the BufferBlob used to find and verify 1115 // Initialize the space for the BufferBlob used to find and verify
1115 // instruction size in MachNode::emit_size() 1116 // instruction size in MachNode::emit_size()
1116 init_scratch_buffer_blob(const_req); 1117 init_scratch_buffer_blob(const_req);
1117 if (failing()) return; // Out of memory 1118 if (failing()) return NULL; // Out of memory
1118 1119
1119 // If this machine supports different size branch offsets, then pre-compute 1120 // Pre-compute the length of blocks and replace
1120 // the length of the blocks 1121 // long branches with short if machine supports it.
1121 if( _matcher->is_short_branch_offset(-1, 0) ) { 1122 shorten_branches(blk_starts, code_req, locs_req, stub_req);
1122 Shorten_branches(blk_labels, code_req, locs_req, stub_req);
1123 labels_not_set = false;
1124 }
1125 1123
1126 // nmethod and CodeBuffer count stubs & constants as part of method's code. 1124 // nmethod and CodeBuffer count stubs & constants as part of method's code.
1127 int exception_handler_req = size_exception_handler(); 1125 int exception_handler_req = size_exception_handler();
1128 int deopt_handler_req = size_deopt_handler(); 1126 int deopt_handler_req = size_deopt_handler();
1129 exception_handler_req += MAX_stubs_size; // add marginal slop for handler 1127 exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1149 cb->initialize(total_req, locs_req); 1147 cb->initialize(total_req, locs_req);
1150 1148
1151 // Have we run out of code space? 1149 // Have we run out of code space?
1152 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { 1150 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1153 turn_off_compiler(this); 1151 turn_off_compiler(this);
1154 return; 1152 return NULL;
1155 } 1153 }
1156 // Configure the code buffer. 1154 // Configure the code buffer.
1157 cb->initialize_consts_size(const_req); 1155 cb->initialize_consts_size(const_req);
1158 cb->initialize_stubs_size(stub_req); 1156 cb->initialize_stubs_size(stub_req);
1159 cb->initialize_oop_recorder(env()->oop_recorder()); 1157 cb->initialize_oop_recorder(env()->oop_recorder());
1160 1158
1161 // fill in the nop array for bundling computations 1159 // fill in the nop array for bundling computations
1162 MachNode *_nop_list[Bundle::_nop_count]; 1160 MachNode *_nop_list[Bundle::_nop_count];
1163 Bundle::initialize_nops(_nop_list, this); 1161 Bundle::initialize_nops(_nop_list, this);
1164 1162
1163 return cb;
1164 }
1165
1166 //------------------------------fill_buffer------------------------------------
1167 void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
1168 // blk_starts[] contains offsets calculated during short branches processing,
1169 // offsets should not be increased during following steps.
1170
1171 // Compute the size of first NumberOfLoopInstrToAlign instructions at head
1172 // of a loop. It is used to determine the padding for loop alignment.
1173 compute_loop_first_inst_sizes();
1174
1165 // Create oopmap set. 1175 // Create oopmap set.
1166 _oop_map_set = new OopMapSet(); 1176 _oop_map_set = new OopMapSet();
1167 1177
1168 // !!!!! This preserves old handling of oopmaps for now 1178 // !!!!! This preserves old handling of oopmaps for now
1169 debug_info()->set_oopmaps(_oop_map_set); 1179 debug_info()->set_oopmaps(_oop_map_set);
1170 1180
1181 uint nblocks = _cfg->_num_blocks;
1171 // Count and start of implicit null check instructions 1182 // Count and start of implicit null check instructions
1172 uint inct_cnt = 0; 1183 uint inct_cnt = 0;
1173 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); 1184 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);
1174 1185
1175 // Count and start of calls 1186 // Count and start of calls
1176 uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); 1187 uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+1);
1177 1188
1178 uint return_offset = 0; 1189 uint return_offset = 0;
1179 int nop_size = (new (this) MachNopNode())->size(_regalloc); 1190 int nop_size = (new (this) MachNopNode())->size(_regalloc);
1180 1191
1181 int previous_offset = 0; 1192 int previous_offset = 0;
1182 int current_offset = 0; 1193 int current_offset = 0;
1183 int last_call_offset = -1; 1194 int last_call_offset = -1;
1195 int last_avoid_back_to_back_offset = -1;
1196 #ifdef ASSERT
1197 int block_alignment_padding = 0;
1198
1199 uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks);
1200 uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
1201 uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
1202 uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks);
1203 #endif
1184 1204
1185 // Create an array of unused labels, one for each basic block, if printing is enabled 1205 // Create an array of unused labels, one for each basic block, if printing is enabled
1186 #ifndef PRODUCT 1206 #ifndef PRODUCT
1187 int *node_offsets = NULL; 1207 int *node_offsets = NULL;
1188 uint node_offset_limit = unique(); 1208 uint node_offset_limit = unique();
1189 1209
1190 1210 if (print_assembly())
1191 if ( print_assembly() )
1192 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); 1211 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit);
1193 #endif 1212 #endif
1194 1213
1195 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily 1214 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
1196 1215
1197 // Emit the constant table. 1216 // Emit the constant table.
1198 if (has_mach_constant_base_node()) { 1217 if (has_mach_constant_base_node()) {
1199 constant_table().emit(*cb); 1218 constant_table().emit(*cb);
1200 } 1219 }
1201 1220
1221 // Create an array of labels, one for each basic block
1222 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+1);
1223 for (uint i=0; i <= nblocks; i++) {
1224 blk_labels[i].init();
1225 }
1226
1202 // ------------------ 1227 // ------------------
1203 // Now fill in the code buffer 1228 // Now fill in the code buffer
1204 Node *delay_slot = NULL; 1229 Node *delay_slot = NULL;
1205 1230
1206 for( i=0; i < _cfg->_num_blocks; i++ ) { 1231 for (uint i=0; i < nblocks; i++) {
1232 guarantee(blk_starts[i] >= (uint)cb->insts_size(),"should not increase size");
1233
1207 Block *b = _cfg->_blocks[i]; 1234 Block *b = _cfg->_blocks[i];
1208 1235
1209 Node *head = b->head(); 1236 Node *head = b->head();
1210 1237
1211 // If this block needs to start aligned (i.e, can be reached other 1238 // If this block needs to start aligned (i.e, can be reached other
1212 // than by falling-thru from the previous block), then force the 1239 // than by falling-thru from the previous block), then force the
1213 // start of a new bundle. 1240 // start of a new bundle.
1214 if( Pipeline::requires_bundling() && starts_bundle(head) ) 1241 if (Pipeline::requires_bundling() && starts_bundle(head))
1215 cb->flush_bundle(true); 1242 cb->flush_bundle(true);
1216 1243
1244 #ifdef ASSERT
1245 if (!b->is_connector()) {
1246 stringStream st;
1247 b->dump_head(&_cfg->_bbs, &st);
1248 MacroAssembler(cb).block_comment(st.as_string());
1249 }
1250 jmp_target[i] = 0;
1251 jmp_offset[i] = 0;
1252 jmp_size[i] = 0;
1253 jmp_rule[i] = 0;
1254
1255 // Maximum alignment padding for loop block was used
1256 // during first round of branches shortening, as result
1257 // padding for nodes (sfpt after call) was not added.
1258 // Take this into account for block's size change check
1259 // and allow increase block's size by the difference
1260 // of maximum and actual alignment paddings.
1261 int orig_blk_size = blk_starts[i+1] - blk_starts[i] + block_alignment_padding;
1262 #endif
1263 int blk_offset = current_offset;
1264
1217 // Define the label at the beginning of the basic block 1265 // Define the label at the beginning of the basic block
1218 if (labels_not_set) { 1266 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1219 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1220 } else {
1221 assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1222 err_msg("label position does not match code offset: %d != %d",
1223 blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
1224 }
1225 1267
1226 uint last_inst = b->_nodes.size(); 1268 uint last_inst = b->_nodes.size();
1227 1269
1228 // Emit block normally, except for last instruction. 1270 // Emit block normally, except for last instruction.
1229 // Emit means "dump code bits into code buffer". 1271 // Emit means "dump code bits into code buffer".
1230 for( uint j = 0; j<last_inst; j++ ) { 1272 for (uint j = 0; j<last_inst; j++) {
1231 1273
1232 // Get the node 1274 // Get the node
1233 Node* n = b->_nodes[j]; 1275 Node* n = b->_nodes[j];
1234 1276
1235 // See if delay slots are supported 1277 // See if delay slots are supported
1242 continue; 1284 continue;
1243 } 1285 }
1244 1286
1245 // If this starts a new instruction group, then flush the current one 1287 // If this starts a new instruction group, then flush the current one
1246 // (but allow split bundles) 1288 // (but allow split bundles)
1247 if( Pipeline::requires_bundling() && starts_bundle(n) ) 1289 if (Pipeline::requires_bundling() && starts_bundle(n))
1248 cb->flush_bundle(false); 1290 cb->flush_bundle(false);
1249 1291
1250 // The following logic is duplicated in the code ifdeffed for 1292 // The following logic is duplicated in the code ifdeffed for
1251 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It 1293 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It
1252 // should be factored out. Or maybe dispersed to the nodes? 1294 // should be factored out. Or maybe dispersed to the nodes?
1253 1295
1254 // Special handling for SafePoint/Call Nodes 1296 // Special handling for SafePoint/Call Nodes
1255 bool is_mcall = false; 1297 bool is_mcall = false;
1256 if( n->is_Mach() ) { 1298 if (n->is_Mach()) {
1257 MachNode *mach = n->as_Mach(); 1299 MachNode *mach = n->as_Mach();
1258 is_mcall = n->is_MachCall(); 1300 is_mcall = n->is_MachCall();
1259 bool is_sfn = n->is_MachSafePoint(); 1301 bool is_sfn = n->is_MachSafePoint();
1260 1302
1261 // If this requires all previous instructions be flushed, then do so 1303 // If this requires all previous instructions be flushed, then do so
1262 if( is_sfn || is_mcall || mach->alignment_required() != 1) { 1304 if (is_sfn || is_mcall || mach->alignment_required() != 1) {
1263 cb->flush_bundle(true); 1305 cb->flush_bundle(true);
1264 current_offset = cb->insts_size(); 1306 current_offset = cb->insts_size();
1265 } 1307 }
1308
1309 // A padding may be needed again since a previous instruction
1310 // could be moved to delay slot.
1266 1311
1267 // align the instruction if necessary 1312 // align the instruction if necessary
1268 int padding = mach->compute_padding(current_offset); 1313 int padding = mach->compute_padding(current_offset);
1269 // Make sure safepoint node for polling is distinct from a call's 1314 // Make sure safepoint node for polling is distinct from a call's
1270 // return by adding a nop if needed. 1315 // return by adding a nop if needed.
1271 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { 1316 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
1272 padding = nop_size; 1317 padding = nop_size;
1273 } 1318 }
1274 assert( labels_not_set || padding == 0, "instruction should already be aligned"); 1319 if (padding == 0 && mach->avoid_back_to_back() &&
1320 current_offset == last_avoid_back_to_back_offset) {
1321 // Avoid back to back some instructions.
1322 padding = nop_size;
1323 }
1275 1324
1276 if(padding > 0) { 1325 if(padding > 0) {
1277 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); 1326 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
1278 int nops_cnt = padding / nop_size; 1327 int nops_cnt = padding / nop_size;
1279 MachNode *nop = new (this) MachNopNode(nops_cnt); 1328 MachNode *nop = new (this) MachNopNode(nops_cnt);
1293 mcall->method_set((intptr_t)mcall->entry_point()); 1342 mcall->method_set((intptr_t)mcall->entry_point());
1294 1343
1295 // Save the return address 1344 // Save the return address
1296 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); 1345 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
1297 1346
1298 if (!mcall->is_safepoint_node()) { 1347 if (mcall->is_MachCallLeaf()) {
1299 is_mcall = false; 1348 is_mcall = false;
1300 is_sfn = false; 1349 is_sfn = false;
1301 } 1350 }
1302 } 1351 }
1303 1352
1304 // sfn will be valid whenever mcall is valid now because of inheritance 1353 // sfn will be valid whenever mcall is valid now because of inheritance
1305 if( is_sfn || is_mcall ) { 1354 if (is_sfn || is_mcall) {
1306 1355
1307 // Handle special safepoint nodes for synchronization 1356 // Handle special safepoint nodes for synchronization
1308 if( !is_mcall ) { 1357 if (!is_mcall) {
1309 MachSafePointNode *sfn = mach->as_MachSafePoint(); 1358 MachSafePointNode *sfn = mach->as_MachSafePoint();
1310 // !!!!! Stubs only need an oopmap right now, so bail out 1359 // !!!!! Stubs only need an oopmap right now, so bail out
1311 if( sfn->jvms()->method() == NULL) { 1360 if (sfn->jvms()->method() == NULL) {
1312 // Write the oopmap directly to the code blob??!! 1361 // Write the oopmap directly to the code blob??!!
1313 # ifdef ENABLE_ZAP_DEAD_LOCALS 1362 # ifdef ENABLE_ZAP_DEAD_LOCALS
1314 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); 1363 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive");
1315 # endif 1364 # endif
1316 continue; 1365 continue;
1326 else if( mach->is_MachNullCheck() ) { 1375 else if( mach->is_MachNullCheck() ) {
1327 inct_starts[inct_cnt++] = previous_offset; 1376 inct_starts[inct_cnt++] = previous_offset;
1328 } 1377 }
1329 1378
1330 // If this is a branch, then fill in the label with the target BB's label 1379 // If this is a branch, then fill in the label with the target BB's label
1331 else if ( mach->is_Branch() ) { 1380 else if (mach->is_MachBranch()) {
1332 1381 // This requires the TRUE branch target be in succs[0]
1333 if ( mach->ideal_Opcode() == Op_Jump ) { 1382 uint block_num = b->non_connector_successor(0)->_pre_order;
1334 for (uint h = 0; h < b->_num_succs; h++ ) { 1383
1335 Block* succs_block = b->_succs[h]; 1384 // Try to replace long branch if delay slot is not used,
1336 for (uint j = 1; j < succs_block->num_preds(); j++) { 1385 // it is mostly for back branches since forward branch's
1337 Node* jpn = succs_block->pred(j); 1386 // distance is not updated yet.
1338 if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { 1387 bool delay_slot_is_used = valid_bundle_info(n) &&
1339 uint block_num = succs_block->non_connector()->_pre_order; 1388 node_bundling(n)->use_unconditional_delay();
1340 Label *blkLabel = &blk_labels[block_num]; 1389 if (!delay_slot_is_used && mach->may_be_short_branch()) {
1341 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); 1390 assert(delay_slot == NULL, "not expecting delay slot node");
1342 } 1391 int br_size = n->size(_regalloc);
1392 int offset = blk_starts[block_num] - current_offset;
1393 if (block_num >= i) {
1394 // Current and following block's offset are not
1395 // finilized yet, adjust distance by the difference
1396 // between calculated and final offsets of current block.
1397 offset -= (blk_starts[i] - blk_offset);
1398 }
1399 // In the following code a nop could be inserted before
1400 // the branch which will increase the backward distance.
1401 bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
1402 if (needs_padding && offset <= 0)
1403 offset -= nop_size;
1404
1405 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
1406 // We've got a winner. Replace this branch.
1407 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
1408
1409 // Update the jmp_size.
1410 int new_size = replacement->size(_regalloc);
1411 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
1412 // Insert padding between avoid_back_to_back branches.
1413 if (needs_padding && replacement->avoid_back_to_back()) {
1414 MachNode *nop = new (this) MachNopNode();
1415 b->_nodes.insert(j++, nop);
1416 _cfg->_bbs.map(nop->_idx, b);
1417 last_inst++;
1418 nop->emit(*cb, _regalloc);
1419 cb->flush_bundle(true);
1420 current_offset = cb->insts_size();
1421 }
1422 #ifdef ASSERT
1423 jmp_target[i] = block_num;
1424 jmp_offset[i] = current_offset - blk_offset;
1425 jmp_size[i] = new_size;
1426 jmp_rule[i] = mach->rule();
1427 #endif
1428 b->_nodes.map(j, replacement);
1429 mach->subsume_by(replacement);
1430 n = replacement;
1431 mach = replacement;
1432 }
1433 }
1434 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
1435 } else if (mach->ideal_Opcode() == Op_Jump) {
1436 for (uint h = 0; h < b->_num_succs; h++) {
1437 Block* succs_block = b->_succs[h];
1438 for (uint j = 1; j < succs_block->num_preds(); j++) {
1439 Node* jpn = succs_block->pred(j);
1440 if (jpn->is_JumpProj() && jpn->in(0) == mach) {
1441 uint block_num = succs_block->non_connector()->_pre_order;
1442 Label *blkLabel = &blk_labels[block_num];
1443 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
1343 } 1444 }
1344 } 1445 }
1345 } else {
1346 // For Branchs
1347 // This requires the TRUE branch target be in succs[0]
1348 uint block_num = b->non_connector_successor(0)->_pre_order;
1349 mach->label_set( blk_labels[block_num], block_num );
1350 } 1446 }
1351 } 1447 }
1352 1448
1353 #ifdef ASSERT 1449 #ifdef ASSERT
1354 // Check that oop-store precedes the card-mark 1450 // Check that oop-store precedes the card-mark
1355 else if( mach->ideal_Opcode() == Op_StoreCM ) { 1451 else if (mach->ideal_Opcode() == Op_StoreCM) {
1356 uint storeCM_idx = j; 1452 uint storeCM_idx = j;
1357 int count = 0; 1453 int count = 0;
1358 for (uint prec = mach->req(); prec < mach->len(); prec++) { 1454 for (uint prec = mach->req(); prec < mach->len(); prec++) {
1359 Node *oop_store = mach->in(prec); // Precedence edge 1455 Node *oop_store = mach->in(prec); // Precedence edge
1360 if (oop_store == NULL) continue; 1456 if (oop_store == NULL) continue;
1369 } 1465 }
1370 assert(count > 0, "storeCM expects at least one precedence edge"); 1466 assert(count > 0, "storeCM expects at least one precedence edge");
1371 } 1467 }
1372 #endif 1468 #endif
1373 1469
1374 else if( !n->is_Proj() ) { 1470 else if (!n->is_Proj()) {
1375 // Remember the beginning of the previous instruction, in case 1471 // Remember the beginning of the previous instruction, in case
1376 // it's followed by a flag-kill and a null-check. Happens on 1472 // it's followed by a flag-kill and a null-check. Happens on
1377 // Intel all the time, with add-to-memory kind of opcodes. 1473 // Intel all the time, with add-to-memory kind of opcodes.
1378 previous_offset = current_offset; 1474 previous_offset = current_offset;
1379 } 1475 }
1386 return; 1482 return;
1387 } 1483 }
1388 1484
1389 // Save the offset for the listing 1485 // Save the offset for the listing
1390 #ifndef PRODUCT 1486 #ifndef PRODUCT
1391 if( node_offsets && n->_idx < node_offset_limit ) 1487 if (node_offsets && n->_idx < node_offset_limit)
1392 node_offsets[n->_idx] = cb->insts_size(); 1488 node_offsets[n->_idx] = cb->insts_size();
1393 #endif 1489 #endif
1394 1490
1395 // "Normal" instruction case 1491 // "Normal" instruction case
1492 DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
1396 n->emit(*cb, _regalloc); 1493 n->emit(*cb, _regalloc);
1397 current_offset = cb->insts_size(); 1494 current_offset = cb->insts_size();
1495
1496 #ifdef ASSERT
1497 if (n->size(_regalloc) < (current_offset-instr_offset)) {
1498 n->dump();
1499 assert(false, "wrong size of mach node");
1500 }
1501 #endif
1398 non_safepoints.observe_instruction(n, current_offset); 1502 non_safepoints.observe_instruction(n, current_offset);
1399 1503
1400 // mcall is last "call" that can be a safepoint 1504 // mcall is last "call" that can be a safepoint
1401 // record it so we can see if a poll will directly follow it 1505 // record it so we can see if a poll will directly follow it
1402 // in which case we'll need a pad to make the PcDesc sites unique 1506 // in which case we'll need a pad to make the PcDesc sites unique
1406 1510
1407 if (is_mcall) { 1511 if (is_mcall) {
1408 last_call_offset = current_offset; 1512 last_call_offset = current_offset;
1409 } 1513 }
1410 1514
1515 if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) {
1516 // Avoid back to back some instructions.
1517 last_avoid_back_to_back_offset = current_offset;
1518 }
1519
1411 // See if this instruction has a delay slot 1520 // See if this instruction has a delay slot
1412 if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { 1521 if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
1413 assert(delay_slot != NULL, "expecting delay slot node"); 1522 assert(delay_slot != NULL, "expecting delay slot node");
1414 1523
1415 // Back up 1 instruction 1524 // Back up 1 instruction
1416 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); 1525 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
1417 1526
1418 // Save the offset for the listing 1527 // Save the offset for the listing
1419 #ifndef PRODUCT 1528 #ifndef PRODUCT
1420 if( node_offsets && delay_slot->_idx < node_offset_limit ) 1529 if (node_offsets && delay_slot->_idx < node_offset_limit)
1421 node_offsets[delay_slot->_idx] = cb->insts_size(); 1530 node_offsets[delay_slot->_idx] = cb->insts_size();
1422 #endif 1531 #endif
1423 1532
1424 // Support a SafePoint in the delay slot 1533 // Support a SafePoint in the delay slot
1425 if( delay_slot->is_MachSafePoint() ) { 1534 if (delay_slot->is_MachSafePoint()) {
1426 MachNode *mach = delay_slot->as_Mach(); 1535 MachNode *mach = delay_slot->as_Mach();
1427 // !!!!! Stubs only need an oopmap right now, so bail out 1536 // !!!!! Stubs only need an oopmap right now, so bail out
1428 if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { 1537 if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) {
1429 // Write the oopmap directly to the code blob??!! 1538 // Write the oopmap directly to the code blob??!!
1430 # ifdef ENABLE_ZAP_DEAD_LOCALS 1539 # ifdef ENABLE_ZAP_DEAD_LOCALS
1431 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); 1540 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive");
1432 # endif 1541 # endif
1433 delay_slot = NULL; 1542 delay_slot = NULL;
1447 // Don't reuse it 1556 // Don't reuse it
1448 delay_slot = NULL; 1557 delay_slot = NULL;
1449 } 1558 }
1450 1559
1451 } // End for all instructions in block 1560 } // End for all instructions in block
1561 assert((uint)blk_offset <= blk_starts[i], "shouldn't increase distance");
1562 blk_starts[i] = blk_offset;
1452 1563
1453 // If the next block is the top of a loop, pad this block out to align 1564 // If the next block is the top of a loop, pad this block out to align
1454 // the loop top a little. Helps prevent pipe stalls at loop back branches. 1565 // the loop top a little. Helps prevent pipe stalls at loop back branches.
1455 if( i<_cfg->_num_blocks-1 ) { 1566 if (i < nblocks-1) {
1456 Block *nb = _cfg->_blocks[i+1]; 1567 Block *nb = _cfg->_blocks[i+1];
1457 uint padding = nb->alignment_padding(current_offset); 1568 int padding = nb->alignment_padding(current_offset);
1458 if( padding > 0 ) { 1569 if( padding > 0 ) {
1459 MachNode *nop = new (this) MachNopNode(padding / nop_size); 1570 MachNode *nop = new (this) MachNopNode(padding / nop_size);
1460 b->_nodes.insert( b->_nodes.size(), nop ); 1571 b->_nodes.insert( b->_nodes.size(), nop );
1461 _cfg->_bbs.map( nop->_idx, b ); 1572 _cfg->_bbs.map( nop->_idx, b );
1462 nop->emit(*cb, _regalloc); 1573 nop->emit(*cb, _regalloc);
1463 current_offset = cb->insts_size(); 1574 current_offset = cb->insts_size();
1464 } 1575 }
1465 } 1576 #ifdef ASSERT
1577 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
1578 block_alignment_padding = (max_loop_pad - padding);
1579 assert(block_alignment_padding >= 0, "sanity");
1580 #endif
1581 }
1582 // Verify that the distance for generated before forward
1583 // short branches is still valid.
1584 assert(orig_blk_size >= (current_offset - blk_offset), "shouldn't increase block size");
1466 1585
1467 } // End of for all blocks 1586 } // End of for all blocks
1587 blk_starts[nblocks] = current_offset;
1468 1588
1469 non_safepoints.flush_at_end(); 1589 non_safepoints.flush_at_end();
1470 1590
1471 // Offset too large? 1591 // Offset too large?
1472 if (failing()) return; 1592 if (failing()) return;
1473 1593
1474 // Define a pseudo-label at the end of the code 1594 // Define a pseudo-label at the end of the code
1475 MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] ); 1595 MacroAssembler(cb).bind( blk_labels[nblocks] );
1476 1596
1477 // Compute the size of the first block 1597 // Compute the size of the first block
1478 _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos(); 1598 _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
1479 1599
1480 assert(cb->insts_size() < 500000, "method is unreasonably large"); 1600 assert(cb->insts_size() < 500000, "method is unreasonably large");
1601
1602 #ifdef ASSERT
1603 for (uint i = 0; i < nblocks; i++) { // For all blocks
1604 if (jmp_target[i] != 0) {
1605 int br_size = jmp_size[i];
1606 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
1607 if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
1608 tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
1609 assert(false, "Displacement too large for short jmp");
1610 }
1611 }
1612 }
1613 #endif
1481 1614
1482 // ------------------ 1615 // ------------------
1483 1616
1484 #ifndef PRODUCT 1617 #ifndef PRODUCT
1485 // Information on the size of the method, without the extraneous code 1618 // Information on the size of the method, without the extraneous code
1563 1696
1564 // Get the offset of the return from the call 1697 // Get the offset of the return from the call
1565 uint call_return = call_returns[b->_pre_order]; 1698 uint call_return = call_returns[b->_pre_order];
1566 #ifdef ASSERT 1699 #ifdef ASSERT
1567 assert( call_return > 0, "no call seen for this basic block" ); 1700 assert( call_return > 0, "no call seen for this basic block" );
1568 while( b->_nodes[--j]->Opcode() == Op_MachProj ) ; 1701 while( b->_nodes[--j]->is_MachProj() ) ;
1569 assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" ); 1702 assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
1570 #endif 1703 #endif
1571 // last instruction is a CatchNode, find it's CatchProjNodes 1704 // last instruction is a CatchNode, find it's CatchProjNodes
1572 int nof_succs = b->_num_succs; 1705 int nof_succs = b->_num_succs;
1573 // allocate space 1706 // allocate space
1574 GrowableArray<intptr_t> handler_bcis(nof_succs); 1707 GrowableArray<intptr_t> handler_bcis(nof_succs);
1741 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) 1874 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1742 1875
1743 // Create a data structure for all the scheduling information 1876 // Create a data structure for all the scheduling information
1744 Scheduling scheduling(Thread::current()->resource_area(), *this); 1877 Scheduling scheduling(Thread::current()->resource_area(), *this);
1745 1878
1746 // Initialize the space for the BufferBlob used to find and verify
1747 // instruction size in MachNode::emit_size()
1748 init_scratch_buffer_blob(MAX_const_size);
1749 if (failing()) return; // Out of memory
1750
1751 // Walk backwards over each basic block, computing the needed alignment 1879 // Walk backwards over each basic block, computing the needed alignment
1752 // Walk over all the basic blocks 1880 // Walk over all the basic blocks
1753 scheduling.DoScheduling(); 1881 scheduling.DoScheduling();
1754 } 1882 }
1755 1883
2026 // branch, OR a conditionally executed instruction if 2154 // branch, OR a conditionally executed instruction if
2027 // the branch is taken. In practice, this means that 2155 // the branch is taken. In practice, this means that
2028 // the first instruction at the branch target is 2156 // the first instruction at the branch target is
2029 // copied to the delay slot, and the branch goes to 2157 // copied to the delay slot, and the branch goes to
2030 // the instruction after that at the branch target 2158 // the instruction after that at the branch target
2031 if ( n->is_Mach() && n->is_Branch() ) { 2159 if ( n->is_MachBranch() ) {
2032 2160
2033 assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" ); 2161 assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
2034 assert( !n->is_Catch(), "should not look for delay slot for Catch" ); 2162 assert( !n->is_Catch(), "should not look for delay slot for Catch" );
2035 2163
2036 #ifndef PRODUCT 2164 #ifndef PRODUCT
2344 // normally schedule conditional branches (despite them being forced last 2472 // normally schedule conditional branches (despite them being forced last
2345 // in the block), because they have delay slots we can fill. Calls all 2473 // in the block), because they have delay slots we can fill. Calls all
2346 // have their delay slots filled in the template expansions, so we don't 2474 // have their delay slots filled in the template expansions, so we don't
2347 // bother scheduling them. 2475 // bother scheduling them.
2348 Node *last = bb->_nodes[_bb_end]; 2476 Node *last = bb->_nodes[_bb_end];
2477 // Ignore trailing NOPs.
2478 while (_bb_end > 0 && last->is_Mach() &&
2479 last->as_Mach()->ideal_Opcode() == Op_Con) {
2480 last = bb->_nodes[--_bb_end];
2481 }
2482 assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
2349 if( last->is_Catch() || 2483 if( last->is_Catch() ||
2350 // Exclude unreachable path case when Halt node is in a separate block. 2484 // Exclude unreachable path case when Halt node is in a separate block.
2351 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { 2485 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
2352 // There must be a prior call. Skip it. 2486 // There must be a prior call. Skip it.
2353 while( !bb->_nodes[--_bb_end]->is_Call() ) { 2487 while( !bb->_nodes[--_bb_end]->is_MachCall() ) {
2354 assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" ); 2488 assert( bb->_nodes[_bb_end]->is_MachProj(), "skipping projections after expected call" );
2355 } 2489 }
2356 } else if( last->is_MachNullCheck() ) { 2490 } else if( last->is_MachNullCheck() ) {
2357 // Backup so the last null-checked memory instruction is 2491 // Backup so the last null-checked memory instruction is
2358 // outside the schedulable range. Skip over the nullcheck, 2492 // outside the schedulable range. Skip over the nullcheck,
2359 // projection, and the memory nodes. 2493 // projection, and the memory nodes.
2661 Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL; 2795 Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
2662 Node* last_safept_node = end_node; 2796 Node* last_safept_node = end_node;
2663 for( uint i = _bb_end-1; i >= _bb_start; i-- ) { 2797 for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
2664 Node *n = b->_nodes[i]; 2798 Node *n = b->_nodes[i];
2665 int is_def = n->outcnt(); // def if some uses prior to adding precedence edges 2799 int is_def = n->outcnt(); // def if some uses prior to adding precedence edges
2666 if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { 2800 if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) {
2667 // Fat-proj kills a slew of registers 2801 // Fat-proj kills a slew of registers
2668 // This can add edges to 'n' and obscure whether or not it was a def, 2802 // This can add edges to 'n' and obscure whether or not it was a def,
2669 // hence the is_def flag. 2803 // hence the is_def flag.
2670 fat_proj_seen = true; 2804 fat_proj_seen = true;
2671 RegMask rm = n->out_RegMask();// Make local copy 2805 RegMask rm = n->out_RegMask();// Make local copy
2678 // Get DEF'd registers the normal way 2812 // Get DEF'd registers the normal way
2679 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); 2813 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
2680 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); 2814 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
2681 } 2815 }
2682 2816
2817 // Kill projections on a branch should appear to occur on the
2818 // branch, not afterwards, so grab the masks from the projections
2819 // and process them.
2820 if (n->is_MachBranch() || n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump) {
2821 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2822 Node* use = n->fast_out(i);
2823 if (use->is_Proj()) {
2824 RegMask rm = use->out_RegMask();// Make local copy
2825 while( rm.is_NotEmpty() ) {
2826 OptoReg::Name kill = rm.find_first_elem();
2827 rm.Remove(kill);
2828 anti_do_def( b, n, kill, false );
2829 }
2830 }
2831 }
2832 }
2833
2683 // Check each register used by this instruction for a following DEF/KILL 2834 // Check each register used by this instruction for a following DEF/KILL
2684 // that must occur afterward and requires an anti-dependence edge. 2835 // that must occur afterward and requires an anti-dependence edge.
2685 for( uint j=0; j<n->req(); j++ ) { 2836 for( uint j=0; j<n->req(); j++ ) {
2686 Node *def = n->in(j); 2837 Node *def = n->in(j);
2687 if( def ) { 2838 if( def ) {
2688 assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" ); 2839 assert( !def->is_MachProj() || def->ideal_reg() != MachProjNode::fat_proj, "" );
2689 anti_do_use( b, n, _regalloc->get_reg_first(def) ); 2840 anti_do_use( b, n, _regalloc->get_reg_first(def) );
2690 anti_do_use( b, n, _regalloc->get_reg_second(def) ); 2841 anti_do_use( b, n, _regalloc->get_reg_second(def) );
2691 } 2842 }
2692 } 2843 }
2693 // Do not allow defs of new derived values to float above GC 2844 // Do not allow defs of new derived values to float above GC