# HG changeset patch # User kvn # Date 1313089691 25200 # Node ID 95134e034042e97a1b08d3e4ed9db84744f861c1 # Parent 6987871cfb9b5044dca7a6df0e7afeb0fefaed66 7063629: use cbcond in C2 generated code on T4 Summary: Use new short branch instruction in C2 generated code. Reviewed-by: never diff -r 6987871cfb9b -r 95134e034042 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -1192,6 +1192,8 @@ assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond"); } +public: + bool use_cbcond(Label& L) { if (!UseCBCond || cbcond_before()) return false; intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc()); @@ -1199,7 +1201,6 @@ return is_simm(x, 12); } -public: // Tells assembler you know that next instruction is delayed Assembler* delayed() { #ifdef CHECK_DELAY @@ -1248,6 +1249,10 @@ inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none); inline void bpr(RCondition c, bool a, Predict p, Register s1, Label& L); + // compare and branch + inline void cbcond(Condition c, CC cc, Register s1, Register s2, Label& L); + inline void cbcond(Condition c, CC cc, Register s1, int simm5, Label& L); + protected: // use MacroAssembler::br instead // pp 138 @@ -1275,10 +1280,6 @@ inline void cb( Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none ); inline void cb( Condition c, bool a, Label& L ); - // compare and branch - inline void cbcond(Condition c, CC cc, Register s1, Register s2, Label& L); - inline void cbcond(Condition c, CC cc, Register s1, int simm5, Label& L); - // pp 149 inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type ); diff -r 6987871cfb9b -r 95134e034042 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Thu Aug 11 12:08:11 2011 -0700 @@ -1834,8 +1834,10 @@ // // NOTE: If the platform does not provide any short branch variants, then // this method should return false for offset 0. -bool Matcher::is_short_branch_offset(int rule, int offset) { - return false; +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // Don't need to adjust the offset. + return UseCBCond && Assembler::is_simm(offset, 12); } const bool Matcher::isSimpleConstant64(jlong value) { @@ -3315,6 +3317,7 @@ //----------Instruction Attributes--------------------------------------------- ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back ins_attrib ins_short_branch(0); // Required flag: is this instruction a // non-matching short branch variant of some // long branch? @@ -3402,6 +3405,15 @@ interface(CONST_INTER); %} +// Integer Immediate: 5-bit +operand immI5() %{ + predicate(Assembler::is_simm(n->get_int(), 5)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Integer Immediate: 0-bit operand immI0() %{ predicate(n->get_int() == 0); @@ -3625,6 +3637,15 @@ interface(CONST_INTER); %} +// Integer Immediate: 5-bit +operand immL5() %{ + predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm((int)n->get_long(), 5)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + // Long Immediate: 13-bit operand immL13() %{ predicate((-4096L < n->get_long()) && (n->get_long() <= 4095L)); @@ -5157,6 +5178,42 @@ MS : R; %} +// Compare and branch +pipe_class cmp_br_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + instruction_count(2); has_delay_slot; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; + BR : R; +%} + +// Compare and branch +pipe_class cmp_br_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI13 src2, label labl, flagsReg cr) %{ + instruction_count(2); has_delay_slot; + cr : E(write); + src1 : R(read); + IALU : R; + BR : R; +%} + +// Compare and branch using cbcond +pipe_class cbcond_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl) %{ + single_instruction; + src1 : E(read); + src2 : E(read); + IALU : R; + BR : R; +%} + +// Compare and branch using cbcond +pipe_class cbcond_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI5 src2, label labl) %{ + single_instruction; + src1 : E(read); + IALU : R; + BR : R; +%} + pipe_class br_fcc(Universe br, cmpOpF cc, flagsReg cr, label labl) %{ single_instruction_with_delay_slot; cr : E(read); @@ -9198,6 +9255,25 @@ ins_pipe(br); %} +// Direct Branch, short with no delay slot +instruct branch_short(label labl) %{ + match(Goto); + predicate(UseCBCond); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "BA $labl\t! short branch" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ ba_short(*L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_imm); +%} + // Conditional Direct Branch instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{ match(If cmp icc); @@ -9211,50 +9287,11 @@ ins_pipe(br_cc); %} -// Branch-on-register tests all 64 bits. We assume that values -// in 64-bit registers always remains zero or sign extended -// unless our code munges the high bits. Interrupts can chop -// the high order bits to zero or sign at any time. -instruct branchCon_regI(cmpOp_reg cmp, iRegI op1, immI0 zero, label labl) %{ - match(If cmp (CmpI op1 zero)); - predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); - effect(USE labl); - - size(8); - ins_cost(BRANCH_COST); - format %{ "BR$cmp $op1,$labl" %} - ins_encode( enc_bpr( labl, cmp, op1 ) ); - ins_pipe(br_reg); -%} - -instruct branchCon_regP(cmpOp_reg cmp, iRegP op1, immP0 null, label labl) %{ - match(If cmp (CmpP op1 null)); - predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); - effect(USE labl); - - size(8); - ins_cost(BRANCH_COST); - format %{ "BR$cmp $op1,$labl" %} - ins_encode( enc_bpr( labl, cmp, op1 ) ); - ins_pipe(br_reg); -%} - -instruct branchCon_regL(cmpOp_reg cmp, iRegL op1, immL0 zero, label labl) %{ - match(If cmp (CmpL op1 zero)); - predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); - effect(USE labl); - - size(8); - ins_cost(BRANCH_COST); - format %{ "BR$cmp $op1,$labl" %} - ins_encode( enc_bpr( labl, cmp, op1 ) ); - ins_pipe(br_reg); -%} - instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{ match(If cmp icc); effect(USE labl); + ins_cost(BRANCH_COST); format %{ "BP$cmp $icc,$labl" %} // Prim = bits 24-22, Secnd = bits 31-30 ins_encode( enc_bp( labl, cmp, icc ) ); @@ -9321,6 +9358,506 @@ ins_pipe(br_cc); %} +// Compare and branch instructions +instruct cmpI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpI_imm_branch(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +instruct cmpU_reg_branch(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpU_imm_branch(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! unsigned\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +instruct cmpL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpL_imm_branch(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + effect(USE labl, KILL xcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! long\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +// Compare Pointers and branch +instruct cmpP_reg_branch(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 op2)); + effect(USE labl, KILL pcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! ptr\n\t" + "B$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpP_null_branch(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 null)); + effect(USE labl, KILL pcc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,0\t! ptr\n\t" + "B$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, G0); + // bpr() is not used here since it has shorter distance. + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpN_reg_branch(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpN_null_branch(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, G0); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +// Loop back branch +instruct cmpI_reg_branchLoopEnd(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$Register); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_reg); +%} + +instruct cmpI_imm_branchLoopEnd(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + effect(USE labl, KILL icc); + + size(12); + ins_cost(BRANCH_COST); + format %{ "CMP $op1,$op2\t! int\n\t" + "BP$cmp $labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + __ cmp($op1$$Register, $op2$$constant); + __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); + __ delayed()->nop(); + %} + ins_pipe(cmp_br_reg_imm); +%} + +// Short compare and branch instructions +instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! int" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(If cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! int" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_imm); +%} + +instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{ + match(If cmp (CmpU op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_imm); +%} + +instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{ + match(If cmp (CmpL op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL xcc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CXB$cmp $op1,$op2,$labl\t! long" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_imm); +%} + +// Compare Pointers and branch +instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL pcc); + + size(4); + ins_cost(BRANCH_COST); +#ifdef _LP64 + format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %} +#else + format %{ "CWB$cmp $op1,$op2,$labl\t! ptr" %} +#endif + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{ + match(If cmp (CmpP op1 null)); + predicate(UseCBCond); + effect(USE labl, KILL pcc); + + size(4); + ins_cost(BRANCH_COST); +#ifdef _LP64 + format %{ "CXB$cmp $op1,0,$labl\t! ptr" %} +#else + format %{ "CWB$cmp $op1,0,$labl\t! ptr" %} +#endif + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,op2,$labl\t! compressed ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{ + match(If cmp (CmpN op1 null)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,0,$labl\t! compressed ptr" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +// Loop back branch +instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_reg); +%} + +instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + predicate(UseCBCond); + effect(USE labl, KILL icc); + + size(4); + ins_cost(BRANCH_COST); + format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %} + ins_encode %{ + Label* L = $labl$$label; + assert(__ use_cbcond(*L), "back to back cbcond"); + __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); + %} + ins_short_branch(1); + ins_avoid_back_to_back(1); + ins_pipe(cbcond_reg_imm); +%} + +// Branch-on-register tests all 64 bits. We assume that values +// in 64-bit registers always remains zero or sign extended +// unless our code munges the high bits. Interrupts can chop +// the high order bits to zero or sign at any time. +instruct branchCon_regI(cmpOp_reg cmp, iRegI op1, immI0 zero, label labl) %{ + match(If cmp (CmpI op1 zero)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_pipe(br_reg); +%} + +instruct branchCon_regP(cmpOp_reg cmp, iRegP op1, immP0 null, label labl) %{ + match(If cmp (CmpP op1 null)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_pipe(br_reg); +%} + +instruct branchCon_regL(cmpOp_reg cmp, iRegL op1, immL0 zero, label labl) %{ + match(If cmp (CmpL op1 zero)); + predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + size(8); + ins_cost(BRANCH_COST); + format %{ "BR$cmp $op1,$labl" %} + ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_pipe(br_reg); +%} + + // ============================================================================ // Long Compare // diff -r 6987871cfb9b -r 95134e034042 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -144,8 +144,13 @@ // Currently not supported anywhere. FLAG_SET_DEFAULT(UseFPUForSpilling, false); + + assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); #endif + assert((CodeEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); + assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); + char buf[512]; jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), diff -r 6987871cfb9b -r 95134e034042 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -1339,9 +1339,8 @@ emit_operand(rax, dst); } -void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { - InstructionMark im(this); - relocate(rtype); +void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { + InstructionMark im(this); assert((0 <= cc) && (cc < 16), "illegal cc"); if (L.is_bound()) { address dst = target(L); @@ -1350,7 +1349,7 @@ const int short_size = 2; const int long_size = 6; intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; - if (rtype == relocInfo::none && is8bit(offs - short_size)) { + if (maybe_short && is8bit(offs - short_size)) { // 0111 tttn #8-bit disp emit_byte(0x70 | cc); emit_byte((offs - short_size) & 0xFF); @@ -1399,7 +1398,7 @@ emit_operand(rsp, adr); } -void Assembler::jmp(Label& L, relocInfo::relocType rtype) { +void Assembler::jmp(Label& L, bool maybe_short) { if (L.is_bound()) { address entry = target(L); assert(entry != NULL, "jmp most probably wrong"); @@ -1407,7 +1406,7 @@ const int short_size = 2; const int long_size = 5; intptr_t offs = entry - _code_pos; - if (rtype == relocInfo::none && is8bit(offs - short_size)) { + if (maybe_short && is8bit(offs - short_size)) { emit_byte(0xEB); emit_byte((offs - short_size) & 0xFF); } else { @@ -1420,7 +1419,6 @@ // the forward jump will not run beyond 256 bytes, use jmpb to // force an 8-bit displacement. InstructionMark im(this); - relocate(rtype); L.add_patch_at(code(), locator()); emit_byte(0xE9); emit_long(0); diff -r 6987871cfb9b -r 95134e034042 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -1065,8 +1065,7 @@ // Note: The same Label can be used for forward and backward branches // but it may be bound only once. - void jcc(Condition cc, Label& L, - relocInfo::relocType rtype = relocInfo::none); + void jcc(Condition cc, Label& L, bool maybe_short = true); // Conditional jump to a 8-bit offset to L. // WARNING: be very careful using this for forward jumps. If the label is @@ -1077,7 +1076,7 @@ void jmp(Address entry); // pc <- entry // Label operations & relative jumps (PPUM Appendix D) - void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none); // unconditional jump to L + void jmp(Label& L, bool maybe_short = true); // unconditional jump to L void jmp(Register entry); // pc <- entry diff -r 6987871cfb9b -r 95134e034042 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Thu Aug 11 12:08:11 2011 -0700 @@ -1369,7 +1369,12 @@ // // NOTE: If the platform does not provide any short branch variants, then // this method should return false for offset 0. -bool Matcher::is_short_branch_offset(int rule, int offset) { +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // On 86 a branch displacement is calculated relative to address + // of a next instruction. + offset -= br_size; + // the short version of jmpConUCF2 contains multiple branches, // making the reach slightly less if (rule == jmpConUCF2_rule) @@ -1713,18 +1718,6 @@ else emit_d32(cbuf,con); %} - enc_class Lbl (label labl) %{ // GOTO - Label *l = $labl$$label; - emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4))); - %} - - enc_class LblShort (label labl) %{ // GOTO - Label *l = $labl$$label; - int disp = l->loc_pos() - (cbuf.insts_size()+1); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - emit_d8(cbuf, disp); - %} - enc_class OpcSReg (eRegI dst) %{ // BSWAP emit_cc(cbuf, $secondary, $dst$$reg ); %} @@ -1747,21 +1740,6 @@ emit_rm(cbuf, 0x3, $secondary, $div$$reg ); %} - enc_class Jcc (cmpOp cop, label labl) %{ // JCC - Label *l = $labl$$label; - $$$emit8$primary; - emit_cc(cbuf, $secondary, $cop$$cmpcode); - emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4))); - %} - - enc_class JccShort (cmpOp cop, label labl) %{ // JCC - Label *l = $labl$$label; - emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size()+1); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - emit_d8(cbuf, disp); - %} - enc_class enc_cmov(cmpOp cop ) %{ // CMOV $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); @@ -13055,8 +13033,10 @@ ins_cost(300); format %{ "JMP $labl" %} size(5); - opcode(0xE9); - ins_encode( OpcP, Lbl( labl ) ); + ins_encode %{ + Label* L = $labl$$label; + __ jmp(*L, false); // Always long jump + %} ins_pipe( pipe_jmp ); %} @@ -13068,8 +13048,10 @@ ins_cost(300); format %{ "J$cop $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode( Jcc( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe( pipe_jcc ); %} @@ -13081,8 +13063,10 @@ ins_cost(300); format %{ "J$cop $labl\t# Loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode( Jcc( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe( pipe_jcc ); %} @@ -13094,8 +13078,10 @@ ins_cost(300); format %{ "J$cop,u $labl\t# Loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode( Jcc( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe( pipe_jcc ); %} @@ -13106,8 +13092,10 @@ ins_cost(200); format %{ "J$cop,u $labl\t# Loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode( Jcc( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe( pipe_jcc ); %} @@ -13119,8 +13107,10 @@ ins_cost(300); format %{ "J$cop,u $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -13131,8 +13121,10 @@ ins_cost(200); format %{ "J$cop,u $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -13151,28 +13143,19 @@ $$emit$$"done:" } %} - size(12); - opcode(0x0F, 0x80); ins_encode %{ Label* l = $labl$$label; - $$$emit8$primary; - emit_cc(cbuf, $secondary, Assembler::parity); - int parity_disp = -1; - bool ok = false; if ($cop$$cmpcode == Assembler::notEqual) { - // the two jumps 6 bytes apart so the jump distances are too - parity_disp = l->loc_pos() - (cbuf.insts_size() + 4); + __ jcc(Assembler::parity, *l, false); + __ jcc(Assembler::notEqual, *l, false); } else if ($cop$$cmpcode == Assembler::equal) { - parity_disp = 6; - ok = true; + Label done; + __ jccb(Assembler::parity, done); + __ jcc(Assembler::equal, *l, false); + __ bind(done); } else { ShouldNotReachHere(); } - emit_d32(cbuf, parity_disp); - $$$emit8$primary; - emit_cc(cbuf, $secondary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size() + 4); - emit_d32(cbuf, disp); %} ins_pipe(pipe_jcc); %} @@ -13239,8 +13222,10 @@ ins_cost(300); format %{ "JMP,s $labl" %} size(2); - opcode(0xEB); - ins_encode( OpcP, LblShort( labl ) ); + ins_encode %{ + Label* L = $labl$$label; + __ jmpb(*L); + %} ins_pipe( pipe_jmp ); ins_short_branch(1); %} @@ -13253,8 +13238,10 @@ ins_cost(300); format %{ "J$cop,s $labl" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13267,8 +13254,10 @@ ins_cost(300); format %{ "J$cop,s $labl\t# Loop end" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13281,8 +13270,10 @@ ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13294,8 +13285,10 @@ ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13308,8 +13301,10 @@ ins_cost(300); format %{ "J$cop,us $labl" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13321,8 +13316,10 @@ ins_cost(300); format %{ "J$cop,us $labl" %} size(2); - opcode(0x70); - ins_encode( JccShort( cop, labl) ); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} @@ -13343,24 +13340,19 @@ } %} size(4); - opcode(0x70); ins_encode %{ Label* l = $labl$$label; - emit_cc(cbuf, $primary, Assembler::parity); - int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { - parity_disp = l->loc_pos() - (cbuf.insts_size() + 1); + __ jccb(Assembler::parity, *l); + __ jccb(Assembler::notEqual, *l); } else if ($cop$$cmpcode == Assembler::equal) { - parity_disp = 2; + Label done; + __ jccb(Assembler::parity, done); + __ jccb(Assembler::equal, *l); + __ bind(done); } else { - ShouldNotReachHere(); - } - emit_d8(cbuf, parity_disp); - emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size() + 1); - emit_d8(cbuf, disp); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); + ShouldNotReachHere(); + } %} ins_pipe(pipe_jcc); ins_short_branch(1); diff -r 6987871cfb9b -r 95134e034042 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu Aug 11 12:08:11 2011 -0700 @@ -1966,7 +1966,12 @@ // // NOTE: If the platform does not provide any short branch variants, then // this method should return false for offset 0. -bool Matcher::is_short_branch_offset(int rule, int offset) { +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // On 86 a branch displacement is calculated relative to address + // of a next instruction. + offset -= br_size; + // the short version of jmpConUCF2 contains multiple branches, // making the reach slightly less if (rule == jmpConUCF2_rule) @@ -2426,22 +2431,6 @@ } %} - enc_class Lbl(label labl) - %{ - // GOTO - Label* l = $labl$$label; - emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4))); - %} - - enc_class LblShort(label labl) - %{ - // GOTO - Label* l = $labl$$label; - int disp = l->loc_pos() - (cbuf.insts_size() + 1); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - emit_d8(cbuf, disp); - %} - enc_class opc2_reg(rRegI dst) %{ // BSWAP @@ -2460,25 +2449,6 @@ emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7); %} - enc_class Jcc(cmpOp cop, label labl) - %{ - // JCC - Label* l = $labl$$label; - $$$emit8$primary; - emit_cc(cbuf, $secondary, $cop$$cmpcode); - emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4))); - %} - - enc_class JccShort (cmpOp cop, label labl) - %{ - // JCC - Label *l = $labl$$label; - emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size() + 1); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - emit_d8(cbuf, disp); - %} - enc_class enc_cmov(cmpOp cop) %{ // CMOV @@ -12011,8 +11981,10 @@ ins_cost(300); format %{ "jmp $labl" %} size(5); - opcode(0xE9); - ins_encode(OpcP, Lbl(labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jmp(*L, false); // Always long jump + %} ins_pipe(pipe_jmp); %} @@ -12025,8 +11997,10 @@ ins_cost(300); format %{ "j$cop $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12039,8 +12013,10 @@ ins_cost(300); format %{ "j$cop $labl\t# loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12052,8 +12028,10 @@ ins_cost(300); format %{ "j$cop,u $labl\t# loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12064,8 +12042,10 @@ ins_cost(200); format %{ "j$cop,u $labl\t# loop end" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12077,8 +12057,10 @@ ins_cost(300); format %{ "j$cop,u $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12089,8 +12071,10 @@ ins_cost(200); format %{ "j$cop,u $labl" %} size(6); - opcode(0x0F, 0x80); - ins_encode(Jcc(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump + %} ins_pipe(pipe_jcc); %} @@ -12109,26 +12093,19 @@ $$emit$$"done:" } %} - size(12); - opcode(0x0F, 0x80); ins_encode %{ Label* l = $labl$$label; - $$$emit8$primary; - emit_cc(cbuf, $secondary, Assembler::parity); - int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { - // the two jumps 6 bytes apart so the jump distances are too - parity_disp = l->loc_pos() - (cbuf.insts_size() + 4); + __ jcc(Assembler::parity, *l, false); + __ jcc(Assembler::notEqual, *l, false); } else if ($cop$$cmpcode == Assembler::equal) { - parity_disp = 6; + Label done; + __ jccb(Assembler::parity, done); + __ jcc(Assembler::equal, *l, false); + __ bind(done); } else { ShouldNotReachHere(); } - emit_d32(cbuf, parity_disp); - $$$emit8$primary; - emit_cc(cbuf, $secondary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size() + 4); - emit_d32(cbuf, disp); %} ins_pipe(pipe_jcc); %} @@ -12204,8 +12181,10 @@ ins_cost(300); format %{ "jmp,s $labl" %} size(2); - opcode(0xEB); - ins_encode(OpcP, LblShort(labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jmpb(*L); + %} ins_pipe(pipe_jmp); ins_short_branch(1); %} @@ -12218,8 +12197,10 @@ ins_cost(300); format %{ "j$cop,s $labl" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12232,8 +12213,10 @@ ins_cost(300); format %{ "j$cop,s $labl\t# loop end" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12246,8 +12229,10 @@ ins_cost(300); format %{ "j$cop,us $labl\t# loop end" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12259,8 +12244,10 @@ ins_cost(300); format %{ "j$cop,us $labl\t# loop end" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12273,8 +12260,10 @@ ins_cost(300); format %{ "j$cop,us $labl" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12286,8 +12275,10 @@ ins_cost(300); format %{ "j$cop,us $labl" %} size(2); - opcode(0x70); - ins_encode(JccShort(cop, labl)); + ins_encode %{ + Label* L = $labl$$label; + __ jccb((Assembler::Condition)($cop$$cmpcode), *L); + %} ins_pipe(pipe_jcc); ins_short_branch(1); %} @@ -12308,24 +12299,19 @@ } %} size(4); - opcode(0x70); ins_encode %{ Label* l = $labl$$label; - emit_cc(cbuf, $primary, Assembler::parity); - int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { - parity_disp = l->loc_pos() - (cbuf.insts_size() + 1); + __ jccb(Assembler::parity, *l); + __ jccb(Assembler::notEqual, *l); } else if ($cop$$cmpcode == Assembler::equal) { - parity_disp = 2; + Label done; + __ jccb(Assembler::parity, done); + __ jccb(Assembler::equal, *l); + __ bind(done); } else { - ShouldNotReachHere(); - } - emit_d8(cbuf, parity_disp); - emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l->loc_pos() - (cbuf.insts_size() + 1); - emit_d8(cbuf, disp); - assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); - assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); + ShouldNotReachHere(); + } %} ins_pipe(pipe_jcc); ins_short_branch(1); diff -r 6987871cfb9b -r 95134e034042 src/os_cpu/linux_x86/vm/linux_x86_32.ad --- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/os_cpu/linux_x86/vm/linux_x86_32.ad Thu Aug 11 12:08:11 2011 -0700 @@ -154,7 +154,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { - return 5; + return MachNode::size(ra_); } %} diff -r 6987871cfb9b -r 95134e034042 src/os_cpu/linux_x86/vm/linux_x86_64.ad --- a/src/os_cpu/linux_x86/vm/linux_x86_64.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/os_cpu/linux_x86/vm/linux_x86_64.ad Thu Aug 11 12:08:11 2011 -0700 @@ -167,7 +167,8 @@ } uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { - return 5; + // distance could be far and requires load and call through register + return MachNode::size(ra_); } %} diff -r 6987871cfb9b -r 95134e034042 src/os_cpu/solaris_x86/vm/solaris_x86_32.ad --- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad Thu Aug 11 12:08:11 2011 -0700 @@ -161,7 +161,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { - return 5; + return MachNode::size(ra_); } %} diff -r 6987871cfb9b -r 95134e034042 src/os_cpu/solaris_x86/vm/solaris_x86_64.ad --- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad Wed Aug 10 14:06:57 2011 -0700 +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad Thu Aug 11 12:08:11 2011 -0700 @@ -180,7 +180,8 @@ uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { - return 5; + // distance could be far and requires load and call through register + return MachNode::size(ra_); } %} diff -r 6987871cfb9b -r 95134e034042 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/adlc/formssel.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -1181,6 +1181,34 @@ strcmp(reduce_result(), short_branch->reduce_result()) == 0 && _matrule->equivalent(AD.globalNames(), short_branch->_matrule)) { // The instructions are equivalent. + + // Now verify that both instructions have the same parameters and + // the same effects. Both branch forms should have the same inputs + // and resulting projections to correctly replace a long branch node + // with corresponding short branch node during code generation. + + bool different = false; + if (short_branch->_components.count() != _components.count()) { + different = true; + } else if (_components.count() > 0) { + short_branch->_components.reset(); + _components.reset(); + Component *comp; + while ((comp = _components.iter()) != NULL) { + Component *short_comp = short_branch->_components.iter(); + if (short_comp == NULL || + short_comp->_type != comp->_type || + short_comp->_usedef != comp->_usedef) { + different = true; + break; + } + } + if (short_branch->_components.iter() != NULL) + different = true; + } + if (different) { + globalAD->syntax_err(short_branch->_linenum, "Instruction %s and its short form %s have different parameters\n", _ident, short_branch->_ident); + } if (AD._short_branch_debug) { fprintf(stderr, "Instruction %s has short form %s\n", _ident, short_branch->_ident); } diff -r 6987871cfb9b -r 95134e034042 src/share/vm/adlc/output_h.cpp --- a/src/share/vm/adlc/output_h.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/adlc/output_h.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -1536,12 +1536,16 @@ // Each instruction attribute results in a virtual call of same name. // The ins_cost is not handled here. Attribute *attr = instr->_attribs; + bool avoid_back_to_back = false; while (attr != NULL) { if (strcmp(attr->_ident,"ins_cost") && strcmp(attr->_ident,"ins_short_branch")) { fprintf(fp," int %s() const { return %s; }\n", attr->_ident, attr->_val); } + // Check value for ins_avoid_back_to_back, and if it is true (1), set the flag + if (!strcmp(attr->_ident,"ins_avoid_back_to_back") && attr->int_val(*this) != 0) + avoid_back_to_back = true; attr = (Attribute *)attr->_next; } @@ -1704,6 +1708,16 @@ } } + // flag: if this instruction should not be generated back to back. + if ( avoid_back_to_back ) { + if ( node_flags_set ) { + fprintf(fp," | Flag_avoid_back_to_back"); + } else { + fprintf(fp,"init_flags(Flag_avoid_back_to_back"); + node_flags_set = true; + } + } + // Check if machine instructions that USE memory, but do not DEF memory, // depend upon a node that defines memory in machine-independent graph. if ( instr->needs_anti_dependence_check(_globalNames) ) { diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/block.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -80,35 +80,37 @@ uint Block::code_alignment() { // Check for Root block - if( _pre_order == 0 ) return CodeEntryAlignment; + if (_pre_order == 0) return CodeEntryAlignment; // Check for Start block - if( _pre_order == 1 ) return InteriorEntryAlignment; + if (_pre_order == 1) return InteriorEntryAlignment; // Check for loop alignment - if (has_loop_alignment()) return loop_alignment(); + if (has_loop_alignment()) return loop_alignment(); - return 1; // no particular alignment + return relocInfo::addr_unit(); // no particular alignment } uint Block::compute_loop_alignment() { Node *h = head(); - if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) { + int unit_sz = relocInfo::addr_unit(); + if (h->is_Loop() && h->as_Loop()->is_inner_loop()) { // Pre- and post-loops have low trip count so do not bother with // NOPs for align loop head. The constants are hidden from tuning // but only because my "divide by 4" heuristic surely gets nearly // all possible gain (a "do not align at all" heuristic has a // chance of getting a really tiny gain). - if( h->is_CountedLoop() && (h->as_CountedLoop()->is_pre_loop() || - h->as_CountedLoop()->is_post_loop()) ) - return (OptoLoopAlignment > 4) ? (OptoLoopAlignment>>2) : 1; + if (h->is_CountedLoop() && (h->as_CountedLoop()->is_pre_loop() || + h->as_CountedLoop()->is_post_loop())) { + return (OptoLoopAlignment > 4*unit_sz) ? (OptoLoopAlignment>>2) : unit_sz; + } // Loops with low backedge frequency should not be aligned. Node *n = h->in(LoopNode::LoopBackControl)->in(0); - if( n->is_MachIf() && n->as_MachIf()->_prob < 0.01 ) { - return 1; // Loop does not loop, more often than not! + if (n->is_MachIf() && n->as_MachIf()->_prob < 0.01) { + return unit_sz; // Loop does not loop, more often than not! } return OptoLoopAlignment; // Otherwise align loop head } - return 1; // no particular alignment + return unit_sz; // no particular alignment } //----------------------------------------------------------------------------- @@ -271,55 +273,55 @@ //------------------------------dump------------------------------------------- #ifndef PRODUCT -void Block::dump_bidx(const Block* orig) const { - if (_pre_order) tty->print("B%d",_pre_order); - else tty->print("N%d", head()->_idx); +void Block::dump_bidx(const Block* orig, outputStream* st) const { + if (_pre_order) st->print("B%d",_pre_order); + else st->print("N%d", head()->_idx); if (Verbose && orig != this) { // Dump the original block's idx - tty->print(" ("); - orig->dump_bidx(orig); - tty->print(")"); + st->print(" ("); + orig->dump_bidx(orig, st); + st->print(")"); } } -void Block::dump_pred(const Block_Array *bbs, Block* orig) const { +void Block::dump_pred(const Block_Array *bbs, Block* orig, outputStream* st) const { if (is_connector()) { for (uint i=1; i_idx]); - p->dump_pred(bbs, orig); + p->dump_pred(bbs, orig, st); } } else { - dump_bidx(orig); - tty->print(" "); + dump_bidx(orig, st); + st->print(" "); } } -void Block::dump_head( const Block_Array *bbs ) const { +void Block::dump_head( const Block_Array *bbs, outputStream* st ) const { // Print the basic block - dump_bidx(this); - tty->print(": #\t"); + dump_bidx(this, st); + st->print(": #\t"); // Print the incoming CFG edges and the outgoing CFG edges for( uint i=0; i<_num_succs; i++ ) { - non_connector_successor(i)->dump_bidx(_succs[i]); - tty->print(" "); + non_connector_successor(i)->dump_bidx(_succs[i], st); + st->print(" "); } - tty->print("<- "); + st->print("<- "); if( head()->is_block_start() ) { for (uint i=1; i_idx]; - p->dump_pred(bbs, p); + p->dump_pred(bbs, p, st); } else { while (!s->is_block_start()) s = s->in(0); - tty->print("N%d ", s->_idx ); + st->print("N%d ", s->_idx ); } } } else - tty->print("BLOCK HEAD IS JUNK "); + st->print("BLOCK HEAD IS JUNK "); // Print loop, if any const Block *bhead = this; // Head of self-loop @@ -330,24 +332,24 @@ while (bx->is_connector()) { bx = (*bbs)[bx->pred(1)->_idx]; } - tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); + st->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); // Dump any loop-specific bits, especially for CountedLoops. - loop->dump_spec(tty); + loop->dump_spec(st); } else if (has_loop_alignment()) { - tty->print(" top-of-loop"); + st->print(" top-of-loop"); } - tty->print(" Freq: %g",_freq); + st->print(" Freq: %g",_freq); if( Verbose || WizardMode ) { - tty->print(" IDom: %d/#%d", _idom ? _idom->_pre_order : 0, _dom_depth); - tty->print(" RegPressure: %d",_reg_pressure); - tty->print(" IHRP Index: %d",_ihrp_index); - tty->print(" FRegPressure: %d",_freg_pressure); - tty->print(" FHRP Index: %d",_fhrp_index); + st->print(" IDom: %d/#%d", _idom ? _idom->_pre_order : 0, _dom_depth); + st->print(" RegPressure: %d",_reg_pressure); + st->print(" IHRP Index: %d",_ihrp_index); + st->print(" FRegPressure: %d",_freg_pressure); + st->print(" FHRP Index: %d",_fhrp_index); } - tty->print_cr(""); + st->print_cr(""); } -void Block::dump() const { dump(0); } +void Block::dump() const { dump(NULL); } void Block::dump( const Block_Array *bbs ) const { dump_head(bbs); @@ -441,9 +443,9 @@ Block *bb = new (_bbs._arena) Block(_bbs._arena,p); _bbs.map(p->_idx,bb); _bbs.map(x->_idx,bb); - if( x != p ) // Only for root is x == p + if( x != p ) { // Only for root is x == p bb->_nodes.push((Node*)x); - + } // Now handle predecessors ++sum; // Count 1 for self block uint cnt = bb->num_preds(); diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/block.hpp --- a/src/share/vm/opto/block.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/block.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -329,10 +329,10 @@ #ifndef PRODUCT // Debugging print of basic block - void dump_bidx(const Block* orig) const; - void dump_pred(const Block_Array *bbs, Block* orig) const; - void dump_head( const Block_Array *bbs ) const; - void dump( ) const; + void dump_bidx(const Block* orig, outputStream* st = tty) const; + void dump_pred(const Block_Array *bbs, Block* orig, outputStream* st = tty) const; + void dump_head( const Block_Array *bbs, outputStream* st = tty ) const; + void dump() const; void dump( const Block_Array *bbs ) const; #endif }; diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/compile.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -785,11 +785,17 @@ // Process an OopMap Element while emitting nodes void Process_OopMap_Node(MachNode *mach, int code_offset); + // Initialize code buffer + CodeBuffer* init_buffer(uint* blk_starts); + // Write out basic block data to code buffer - void Fill_buffer(); + void fill_buffer(CodeBuffer* cb, uint* blk_starts); // Determine which variable sized branches can be shortened - void Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size); + void shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size); + + // Inserts nops where needed and final shorten branches. + void finalize_offsets_and_shorten(uint* blk_starts); // Compute the size of first NumberOfLoopInstrToAlign instructions // at the head of a loop. diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/machnode.hpp --- a/src/share/vm/opto/machnode.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/machnode.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -188,6 +188,9 @@ virtual MachNode *short_branch_version(Compile* C) { return NULL; } bool may_be_short_branch() const { return (flags() & Flag_may_be_short_branch) != 0; } + // Avoid back to back some instructions on some CPUs. + bool avoid_back_to_back() const { return (flags() & Flag_avoid_back_to_back) != 0; } + // First index in _in[] corresponding to operand, or -1 if there is none int operand_index(uint operand) const; diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/matcher.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -351,7 +351,7 @@ virtual int regnum_to_fpu_offset(int regnum); // Is this branch offset small enough to be addressed by a short branch? - bool is_short_branch_offset(int rule, int offset); + bool is_short_branch_offset(int rule, int br_size, int offset); // Optional scaling for the parameter to the ClearArray/CopyArray node. static const bool init_array_count_is_in_bytes; diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/node.hpp Thu Aug 11 12:08:11 2011 -0700 @@ -637,7 +637,8 @@ Flag_is_Branch = Flag_is_cisc_alternate << 1, Flag_is_dead_loop_safe = Flag_is_Branch << 1, Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, - _max_flags = (Flag_may_be_short_branch << 1) - 1 // allow flags combination + Flag_avoid_back_to_back = Flag_may_be_short_branch << 1, + _max_flags = (Flag_avoid_back_to_back << 1) - 1 // allow flags combination }; private: diff -r 6987871cfb9b -r 95134e034042 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Wed Aug 10 14:06:57 2011 -0700 +++ b/src/share/vm/opto/output.cpp Thu Aug 11 12:08:11 2011 -0700 @@ -128,6 +128,14 @@ if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); # endif + uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); + blk_starts[0] = 0; + + // Initialize code buffer and process short branches. + CodeBuffer* cb = init_buffer(blk_starts); + + if (cb == NULL || failing()) return; + ScheduleAndBundle(); #ifndef PRODUCT @@ -148,11 +156,13 @@ if (failing()) return; + finalize_offsets_and_shorten(blk_starts); + BuildOopMaps(); if (failing()) return; - Fill_buffer(); + fill_buffer(cb, blk_starts); } bool Compile::need_stack_bang(int frame_size_in_bytes) const { @@ -325,22 +335,22 @@ } // if( MaxLoopPad < OptoLoopAlignment-1 ) } -//----------------------Shorten_branches--------------------------------------- +//----------------------shorten_branches--------------------------------------- // The architecture description provides short branch variants for some long // branch instructions. Replace eligible long branches with short branches. -void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) { - - // fill in the nop array for bundling computations - MachNode *_nop_list[Bundle::_nop_count]; - Bundle::initialize_nops(_nop_list, this); +void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { // ------------------ // Compute size of each block, method size, and relocation information size - uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); - uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); - DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) - DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) - blk_starts[0] = 0; + uint nblocks = _cfg->_num_blocks; + + uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); + uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); + int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); + DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) + DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); ) + + bool has_short_branch_candidate = false; // Initialize the sizes to 0 code_size = 0; // Size in bytes of generated code @@ -350,28 +360,35 @@ reloc_size = 1; // Number of relocation entries // Make three passes. The first computes pessimistic blk_starts, - // relative jmp_end and reloc_size information. The second performs + // relative jmp_offset and reloc_size information. The second performs // short branch substitution using the pessimistic sizing. The // third inserts nops where needed. - Node *nj; // tmp - // Step one, perform a pessimistic sizing pass. - uint i; - uint min_offset_from_last_call = 1; // init to a positive value + uint last_call_adr = max_uint; + uint last_avoid_back_to_back_adr = max_uint; uint nop_size = (new (this) MachNopNode())->size(_regalloc); - for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks + for (uint i = 0; i < nblocks; i++) { // For all blocks Block *b = _cfg->_blocks[i]; + // During short branch replacement, we store the relative (to blk_starts) + // offset of jump in jmp_offset, rather than the absolute offset of jump. + // This is so that we do not need to recompute sizes of all nodes when + // we compute correct blk_starts in our next sizing pass. + jmp_offset[i] = 0; + jmp_size[i] = 0; + jmp_nidx[i] = -1; + DEBUG_ONLY( jmp_target[i] = 0; ) + DEBUG_ONLY( jmp_rule[i] = 0; ) + // Sum all instruction sizes to compute block size uint last_inst = b->_nodes.size(); uint blk_size = 0; - for( uint j = 0; j_nodes[j]; + for (uint j = 0; j < last_inst; j++) { + Node* nj = b->_nodes[j]; uint inst_size = nj->size(_regalloc); - blk_size += inst_size; // Handle machine instruction nodes - if( nj->is_Mach() ) { + if (nj->is_Mach()) { MachNode *mach = nj->as_Mach(); blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding reloc_size += mach->reloc(); @@ -388,32 +405,52 @@ } else if (mach->is_MachSafePoint()) { // If call/safepoint are adjacent, account for possible // nop to disambiguate the two safepoints. - if (min_offset_from_last_call == 0) { + // ScheduleAndBundle() can rearrange nodes in a block, + // check for all offsets inside this block. + if (last_call_adr >= blk_starts[i]) { + blk_size += nop_size; + } + } + if (mach->avoid_back_to_back()) { + // Nop is inserted between "avoid back to back" instructions. + // ScheduleAndBundle() can rearrange nodes in a block, + // check for all offsets inside this block. + if (last_avoid_back_to_back_adr >= blk_starts[i]) { blk_size += nop_size; } } + if (mach->may_be_short_branch()) { + if (!nj->is_Branch()) { +#ifndef PRODUCT + nj->dump(3); +#endif + Unimplemented(); + } + assert(jmp_nidx[i] == -1, "block should have only one branch"); + jmp_offset[i] = blk_size; + jmp_size[i] = inst_size; + jmp_nidx[i] = j; + has_short_branch_candidate = true; + } } - min_offset_from_last_call += inst_size; + blk_size += inst_size; // Remember end of call offset if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { - min_offset_from_last_call = 0; + last_call_adr = blk_starts[i]+blk_size; + } + // Remember end of avoid_back_to_back offset + if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { + last_avoid_back_to_back_adr = blk_starts[i]+blk_size; } } - // During short branch replacement, we store the relative (to blk_starts) - // end of jump in jmp_end, rather than the absolute end of jump. This - // is so that we do not need to recompute sizes of all nodes when we compute - // correct blk_starts in our next sizing pass. - jmp_end[i] = blk_size; - DEBUG_ONLY( jmp_target[i] = 0; ) - // When the next block starts a loop, we may insert pad NOP // instructions. Since we cannot know our future alignment, // assume the worst. - if( i<_cfg->_num_blocks-1 ) { + if (i< nblocks-1) { Block *nb = _cfg->_blocks[i+1]; int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); - if( max_loop_pad > 0 ) { + if (max_loop_pad > 0) { assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); blk_size += max_loop_pad; } @@ -424,124 +461,100 @@ } // Step two, replace eligible long jumps. - - // Note: this will only get the long branches within short branch - // range. Another pass might detect more branches that became - // candidates because the shortening in the first pass exposed - // more opportunities. Unfortunately, this would require - // recomputing the starting and ending positions for the blocks - for( i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; - - int j; - // Find the branch; ignore trailing NOPs. - for( j = b->_nodes.size()-1; j>=0; j-- ) { - nj = b->_nodes[j]; - if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) - break; - } - - if (j >= 0) { - if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { - MachNode *mach = nj->as_Mach(); + bool progress = true; + uint last_may_be_short_branch_adr = max_uint; + while (has_short_branch_candidate && progress) { + progress = false; + has_short_branch_candidate = false; + int adjust_block_start = 0; + for (uint i = 0; i < nblocks; i++) { + Block *b = _cfg->_blocks[i]; + int idx = jmp_nidx[i]; + MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); + if (mach != NULL && mach->may_be_short_branch()) { +#ifdef ASSERT + assert(jmp_size[i] > 0 && mach->is_Branch(), "sanity"); + int j; + // Find the branch; ignore trailing NOPs. + for (j = b->_nodes.size()-1; j>=0; j--) { + Node* n = b->_nodes[j]; + if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) + break; + } + assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); +#endif + int br_size = jmp_size[i]; + int br_offs = blk_starts[i] + jmp_offset[i]; + // This requires the TRUE branch target be in succs[0] uint bnum = b->non_connector_successor(0)->_pre_order; - uintptr_t target = blk_starts[bnum]; - if( mach->is_Branch() ) { - int offset = target-(blk_starts[i] + jmp_end[i]); - if (_matcher->is_short_branch_offset(mach->rule(), offset)) { - // We've got a winner. Replace this branch. - MachNode* replacement = mach->short_branch_version(this); - b->_nodes.map(j, replacement); - mach->subsume_by(replacement); - - // Update the jmp_end size to save time in our - // next pass. - jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); - DEBUG_ONLY( jmp_target[i] = bnum; ); - DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); + int offset = blk_starts[bnum] - br_offs; + if (bnum > i) { // adjust following block's offset + offset -= adjust_block_start; + } + // In the following code a nop could be inserted before + // the branch which will increase the backward distance. + bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr); + if (needs_padding && offset <= 0) + offset -= nop_size; + + if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { + // We've got a winner. Replace this branch. + MachNode* replacement = mach->short_branch_version(this); + + // Update the jmp_size. + int new_size = replacement->size(_regalloc); + int diff = br_size - new_size; + assert(diff >= (int)nop_size, "short_branch size should be smaller"); + // Conservatively take into accound padding between + // avoid_back_to_back branches. Previous branch could be + // converted into avoid_back_to_back branch during next + // rounds. + if (needs_padding && replacement->avoid_back_to_back()) { + jmp_offset[i] += nop_size; + diff -= nop_size; } + adjust_block_start += diff; + b->_nodes.map(idx, replacement); + mach->subsume_by(replacement); + mach = replacement; + progress = true; + + jmp_size[i] = new_size; + DEBUG_ONLY( jmp_target[i] = bnum; ); + DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); } else { -#ifndef PRODUCT - mach->dump(3); -#endif - Unimplemented(); + // The jump distance is not short, try again during next iteration. + has_short_branch_candidate = true; } + } // (mach->may_be_short_branch()) + if (mach != NULL && (mach->may_be_short_branch() || + mach->avoid_back_to_back())) { + last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i]; } - } - } - - // Compute the size of first NumberOfLoopInstrToAlign instructions at head - // of a loop. It is used to determine the padding for loop alignment. - compute_loop_first_inst_sizes(); - - // Step 3, compute the offsets of all the labels - uint last_call_adr = max_uint; - for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks - // copy the offset of the beginning to the corresponding label - assert(labels[i].is_unused(), "cannot patch at this point"); - labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS); - - // insert padding for any instructions that need it - Block *b = _cfg->_blocks[i]; - uint last_inst = b->_nodes.size(); - uint adr = blk_starts[i]; - for( uint j = 0; j_nodes[j]; - if( nj->is_Mach() ) { - int padding = nj->as_Mach()->compute_padding(adr); - // If call/safepoint are adjacent insert a nop (5010568) - if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && - adr == last_call_adr ) { - padding = nop_size; - } - if(padding > 0) { - assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); - int nops_cnt = padding / nop_size; - MachNode *nop = new (this) MachNopNode(nops_cnt); - b->_nodes.insert(j++, nop); - _cfg->_bbs.map( nop->_idx, b ); - adr += padding; - last_inst++; - } - } - adr += nj->size(_regalloc); - - // Remember end of call offset - if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { - last_call_adr = adr; - } - } - - if ( i != _cfg->_num_blocks-1) { - // Get the size of the block - uint blk_size = adr - blk_starts[i]; - - // When the next block is the top of a loop, we may insert pad NOP - // instructions. - Block *nb = _cfg->_blocks[i+1]; - int current_offset = blk_starts[i] + blk_size; - current_offset += nb->alignment_padding(current_offset); - // Save block size; update total method size - blk_starts[i+1] = current_offset; + blk_starts[i+1] -= adjust_block_start; } } #ifdef ASSERT - for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks - if( jmp_target[i] != 0 ) { - int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); - if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { - tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); + for (uint i = 0; i < nblocks; i++) { // For all blocks + if (jmp_target[i] != 0) { + int br_size = jmp_size[i]; + int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); + if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { + tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); } - assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); + assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); } } #endif + // Step 3, compute the offsets of all blocks, will be done in finalize_offsets_and_shorten() + // after ScheduleAndBundle(). + // ------------------ // Compute size for code buffer - code_size = blk_starts[i-1] + jmp_end[i-1]; + code_size = blk_starts[nblocks]; // Relocation records reloc_size += 1; // Relo entry for exception handler @@ -550,7 +563,189 @@ // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for // a relocation index. // The CodeBuffer will expand the locs array if this estimate is too low. - reloc_size *= 10 / sizeof(relocInfo); + reloc_size *= 10 / sizeof(relocInfo); +} + +//----------------------finalize_offsets_and_shorten------------------------- +void Compile::finalize_offsets_and_shorten(uint* blk_starts) { + // blk_starts[] contains offsets calculated during short branches processing, + // offsets should not be increased during following steps. + + // Compute the size of first NumberOfLoopInstrToAlign instructions at head + // of a loop. It is used to determine the padding for loop alignment. + compute_loop_first_inst_sizes(); + + uint nblocks = _cfg->_num_blocks; +#ifdef ASSERT + uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); + uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); + uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); + uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); +#endif + + // Inserts nops where needed and do final short branches replacement. + uint nop_size = (new (this) MachNopNode())->size(_regalloc); + uint last_call_adr = max_uint; + uint last_avoid_back_to_back_adr = max_uint; + + assert(blk_starts[0] == 0, "sanity"); + uint current_offset = 0; + uint block_alignment_padding = 0; + + for (uint i=0; i < nblocks; i++) { // For all blocks + Block *b = _cfg->_blocks[i]; + +#ifdef ASSERT + jmp_target[i] = 0; + jmp_offset[i] = 0; + jmp_size[i] = 0; + jmp_rule[i] = 0; +#endif + + // Maximum alignment was added before loop block during + // Step One, as result padding for nodes was not added. + // Take this into account for block's size change check + // and allow increase block's size by the difference + // of maximum and actual alignment paddings. + DEBUG_ONLY( uint orig_blk_size = blk_starts[i+1] - blk_starts[i] + block_alignment_padding; ) + uint blk_offset = current_offset; + + uint last_inst = b->_nodes.size(); + for (uint j = 0; j_nodes[j]; + + if (valid_bundle_info(nj) && + node_bundling(nj)->used_in_unconditional_delay()) { + continue; // Skip instruction in delay slot + } + + uint inst_size = nj->size(_regalloc); + if (nj->is_Mach()) { + MachNode *mach = nj->as_Mach(); + int padding = mach->compute_padding(current_offset); + + // If call/safepoint are adjacent insert a nop (5010568) + if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && + current_offset == last_call_adr) { + padding = nop_size; + } + + // Inserted a nop between "avoid back to back" instructions. + if (padding == 0 && mach->avoid_back_to_back() && + current_offset == last_avoid_back_to_back_adr) { + padding = nop_size; + } + + if (padding > 0) { + assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); + int nops_cnt = padding / nop_size; + MachNode *nop = new (this) MachNopNode(nops_cnt); + b->_nodes.insert(j++, nop); + _cfg->_bbs.map(nop->_idx, b); + last_inst++; + current_offset += padding; + } + + // Try to replace long branch if delay slot is not used, + // it is mostly for back branches since forward branch's + // distance is not updated yet. + bool delay_slot_is_used = valid_bundle_info(nj) && + node_bundling(nj)->use_unconditional_delay(); + if (!delay_slot_is_used && mach->may_be_short_branch()) { + int br_size = inst_size; + + // This requires the TRUE branch target be in succs[0] + uint bnum = b->non_connector_successor(0)->_pre_order; + int offset = blk_starts[bnum] - current_offset; + if (bnum >= i) { + // Current and following block's offset are not + // finilized yet, adjust distance. + offset -= (blk_starts[i] - blk_offset); + } + // In the following code a nop could be inserted before + // the branch which will increase the backward distance. + bool needs_padding = (current_offset == last_avoid_back_to_back_adr); + if (needs_padding && offset <= 0) + offset -= nop_size; + + if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { + // We've got a winner. Replace this branch. + MachNode* replacement = mach->short_branch_version(this); + + // Update the jmp_size. + int new_size = replacement->size(_regalloc); + assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); + // Conservatively take into accound padding between + // avoid_back_to_back branches. Previous branch could be + // converted into avoid_back_to_back branch during next + // rounds. + if (needs_padding && replacement->avoid_back_to_back()) { + MachNode *nop = new (this) MachNopNode(); + b->_nodes.insert(j++, nop); + _cfg->_bbs.map(nop->_idx, b); + last_inst++; + current_offset += nop_size; + } + inst_size = new_size; + b->_nodes.map(j, replacement); + mach->subsume_by(replacement); + nj = replacement; +#ifdef ASSERT + jmp_target[i] = bnum; + jmp_offset[i] = current_offset - blk_offset; + jmp_size[i] = new_size; + jmp_rule[i] = mach->rule(); +#endif + } + } + } + current_offset += inst_size; + + // Remember end of call offset + if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { + last_call_adr = current_offset; + } + // Remember end of avoid_back_to_back offset + if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { + last_avoid_back_to_back_adr = current_offset; + } + } + assert(blk_offset <= blk_starts[i], "shouldn't increase distance"); + blk_starts[i] = blk_offset; + + // When the next block is the top of a loop, we may insert pad NOP + // instructions. + if (i < nblocks-1) { + Block *nb = _cfg->_blocks[i+1]; + int padding = nb->alignment_padding(current_offset); + if (padding > 0) { + assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); + int nops_cnt = padding / nop_size; + MachNode *nop = new (this) MachNopNode(nops_cnt); + b->_nodes.insert(b->_nodes.size(), nop); + _cfg->_bbs.map(nop->_idx, b); + current_offset += padding; + } + int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); + assert(max_loop_pad >= padding, "sanity"); + block_alignment_padding = max_loop_pad - padding; + } + assert(orig_blk_size >= (current_offset - blk_offset), "shouldn't increase block size"); + } + blk_starts[nblocks] = current_offset; + +#ifdef ASSERT + for (uint i = 0; i < nblocks; i++) { // For all blocks + if (jmp_target[i] != 0) { + int br_size = jmp_size[i]; + int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); + if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { + tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); + } + assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); + } + } +#endif } //------------------------------FillLocArray----------------------------------- @@ -1026,7 +1221,7 @@ -// helper for Fill_buffer bailout logic +// helper for fill_buffer bailout logic static void turn_off_compiler(Compile* C) { if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) { // Do not turn off compilation if a single giant method has @@ -1039,22 +1234,20 @@ } -//------------------------------Fill_buffer------------------------------------ -void Compile::Fill_buffer() { +//------------------------------init_buffer------------------------------------ +CodeBuffer* Compile::init_buffer(uint* blk_starts) { // Set the initially allocated size int code_req = initial_code_capacity; int locs_req = initial_locs_capacity; int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; int const_req = initial_const_capacity; - bool labels_not_set = true; int pad_req = NativeCall::instruction_size; // The extra spacing after the code is necessary on some platforms. // Sometimes we need to patch in a jump after the last instruction, // if the nmethod has been deoptimized. (See 4932387, 4894843.) - uint i; // Compute the byte offset where we can store the deopt pc. if (fixed_slots() != 0) { _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); @@ -1078,19 +1271,12 @@ _frame_slots += 8*(16/BytesPerInt); } #endif - assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); - - // Create an array of unused labels, one for each basic block - Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); - - for( i=0; i <= _cfg->_num_blocks; i++ ) { - blk_labels[i].init(); - } + assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); if (has_mach_constant_base_node()) { // Fill the constant table. - // Note: This must happen before Shorten_branches. - for (i = 0; i < _cfg->_num_blocks; i++) { + // Note: This must happen before shorten_branches. + for (uint i = 0; i < _cfg->_num_blocks; i++) { Block* b = _cfg->_blocks[i]; for (uint j = 0; j < b->_nodes.size(); j++) { @@ -1114,14 +1300,11 @@ // Initialize the space for the BufferBlob used to find and verify // instruction size in MachNode::emit_size() init_scratch_buffer_blob(const_req); - if (failing()) return; // Out of memory - - // If this machine supports different size branch offsets, then pre-compute - // the length of the blocks - if( _matcher->is_short_branch_offset(-1, 0) ) { - Shorten_branches(blk_labels, code_req, locs_req, stub_req); - labels_not_set = false; - } + if (failing()) return NULL; // Out of memory + + // Pre-compute the length of blocks and replace + // long branches with short if machine supports it. + shorten_branches(blk_starts, code_req, locs_req, stub_req); // nmethod and CodeBuffer count stubs & constants as part of method's code. int exception_handler_req = size_exception_handler(); @@ -1151,7 +1334,7 @@ // Have we run out of code space? if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { turn_off_compiler(this); - return; + return NULL; } // Configure the code buffer. cb->initialize_consts_size(const_req); @@ -1162,6 +1345,12 @@ MachNode *_nop_list[Bundle::_nop_count]; Bundle::initialize_nops(_nop_list, this); + return cb; +} + +//------------------------------fill_buffer------------------------------------ +void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { + // Create oopmap set. _oop_map_set = new OopMapSet(); @@ -1180,15 +1369,16 @@ int previous_offset = 0; int current_offset = 0; +#ifdef ASSERT int last_call_offset = -1; - + int last_avoid_back_to_back_offset = -1; +#endif // Create an array of unused labels, one for each basic block, if printing is enabled #ifndef PRODUCT int *node_offsets = NULL; - uint node_offset_limit = unique(); - - - if ( print_assembly() ) + uint node_offset_limit = unique(); + + if (print_assembly()) node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); #endif @@ -1199,11 +1389,19 @@ constant_table().emit(*cb); } + // Create an array of labels, one for each basic block + Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); + for (uint i=0; i <= _cfg->_num_blocks; i++) { + blk_labels[i].init(); + } + // ------------------ // Now fill in the code buffer Node *delay_slot = NULL; - for( i=0; i < _cfg->_num_blocks; i++ ) { + for (uint i=0; i < _cfg->_num_blocks; i++) { + guarantee(blk_starts[i] == (uint)cb->insts_size(),"should not change size"); + Block *b = _cfg->_blocks[i]; Node *head = b->head(); @@ -1211,23 +1409,25 @@ // If this block needs to start aligned (i.e, can be reached other // than by falling-thru from the previous block), then force the // start of a new bundle. - if( Pipeline::requires_bundling() && starts_bundle(head) ) + if (Pipeline::requires_bundling() && starts_bundle(head)) cb->flush_bundle(true); +#ifdef ASSERT + if (!b->is_connector()) { + stringStream st; + b->dump_head(&_cfg->_bbs, &st); + MacroAssembler(cb).block_comment(st.as_string()); + } +#endif + // Define the label at the beginning of the basic block - if (labels_not_set) { - MacroAssembler(cb).bind(blk_labels[b->_pre_order]); - } else { - assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(), - err_msg("label position does not match code offset: %d != %d", - blk_labels[b->_pre_order].loc_pos(), cb->insts_size())); - } + MacroAssembler(cb).bind(blk_labels[b->_pre_order]); uint last_inst = b->_nodes.size(); // Emit block normally, except for last instruction. // Emit means "dump code bits into code buffer". - for( uint j = 0; j_nodes[j]; @@ -1244,7 +1444,7 @@ // If this starts a new instruction group, then flush the current one // (but allow split bundles) - if( Pipeline::requires_bundling() && starts_bundle(n) ) + if (Pipeline::requires_bundling() && starts_bundle(n)) cb->flush_bundle(false); // The following logic is duplicated in the code ifdeffed for @@ -1253,38 +1453,35 @@ // Special handling for SafePoint/Call Nodes bool is_mcall = false; - if( n->is_Mach() ) { + if (n->is_Mach()) { MachNode *mach = n->as_Mach(); is_mcall = n->is_MachCall(); bool is_sfn = n->is_MachSafePoint(); // If this requires all previous instructions be flushed, then do so - if( is_sfn || is_mcall || mach->alignment_required() != 1) { + if (is_sfn || is_mcall || mach->alignment_required() != 1) { cb->flush_bundle(true); current_offset = cb->insts_size(); } +#ifdef ASSERT + // A padding may be needed again since a previous instruction + // could be moved to delay slot. + // align the instruction if necessary int padding = mach->compute_padding(current_offset); // Make sure safepoint node for polling is distinct from a call's // return by adding a nop if needed. - if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { + if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) { padding = nop_size; } - assert( labels_not_set || padding == 0, "instruction should already be aligned"); - - if(padding > 0) { - assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); - int nops_cnt = padding / nop_size; - MachNode *nop = new (this) MachNopNode(nops_cnt); - b->_nodes.insert(j++, nop); - last_inst++; - _cfg->_bbs.map( nop->_idx, b ); - nop->emit(*cb, _regalloc); - cb->flush_bundle(true); - current_offset = cb->insts_size(); + if (padding == 0 && mach->avoid_back_to_back() && + current_offset == last_avoid_back_to_back_offset) { + // Avoid back to back some instructions. + padding = nop_size; } - + assert(padding == 0, "padding should be added already"); +#endif // Remember the start of the last call in a basic block if (is_mcall) { MachCallNode *mcall = mach->as_MachCall(); @@ -1302,13 +1499,13 @@ } // sfn will be valid whenever mcall is valid now because of inheritance - if( is_sfn || is_mcall ) { + if (is_sfn || is_mcall) { // Handle special safepoint nodes for synchronization - if( !is_mcall ) { + if (!is_mcall) { MachSafePointNode *sfn = mach->as_MachSafePoint(); // !!!!! Stubs only need an oopmap right now, so bail out - if( sfn->jvms()->method() == NULL) { + if (sfn->jvms()->method() == NULL) { // Write the oopmap directly to the code blob??!! # ifdef ENABLE_ZAP_DEAD_LOCALS assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); @@ -1328,14 +1525,14 @@ } // If this is a branch, then fill in the label with the target BB's label - else if ( mach->is_Branch() ) { - - if ( mach->ideal_Opcode() == Op_Jump ) { - for (uint h = 0; h < b->_num_succs; h++ ) { + else if (mach->is_Branch()) { + + if (mach->ideal_Opcode() == Op_Jump) { + for (uint h = 0; h < b->_num_succs; h++) { Block* succs_block = b->_succs[h]; for (uint j = 1; j < succs_block->num_preds(); j++) { Node* jpn = succs_block->pred(j); - if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { + if (jpn->is_JumpProj() && jpn->in(0) == mach) { uint block_num = succs_block->non_connector()->_pre_order; Label *blkLabel = &blk_labels[block_num]; mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); @@ -1352,7 +1549,7 @@ #ifdef ASSERT // Check that oop-store precedes the card-mark - else if( mach->ideal_Opcode() == Op_StoreCM ) { + else if (mach->ideal_Opcode() == Op_StoreCM) { uint storeCM_idx = j; int count = 0; for (uint prec = mach->req(); prec < mach->len(); prec++) { @@ -1371,7 +1568,7 @@ } #endif - else if( !n->is_Proj() ) { + else if (!n->is_Proj()) { // Remember the beginning of the previous instruction, in case // it's followed by a flag-kill and a null-check. Happens on // Intel all the time, with add-to-memory kind of opcodes. @@ -1388,15 +1585,24 @@ // Save the offset for the listing #ifndef PRODUCT - if( node_offsets && n->_idx < node_offset_limit ) + if (node_offsets && n->_idx < node_offset_limit) node_offsets[n->_idx] = cb->insts_size(); #endif // "Normal" instruction case + DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) n->emit(*cb, _regalloc); current_offset = cb->insts_size(); + +#ifdef ASSERT + if (n->size(_regalloc) != (current_offset-instr_offset)) { + n->dump(); + assert(n->size(_regalloc) == (current_offset-instr_offset), "wrong size of mach node"); + } +#endif non_safepoints.observe_instruction(n, current_offset); +#ifdef ASSERT // mcall is last "call" that can be a safepoint // record it so we can see if a poll will directly follow it // in which case we'll need a pad to make the PcDesc sites unique @@ -1408,8 +1614,14 @@ last_call_offset = current_offset; } + if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) { + // Avoid back to back some instructions. + last_avoid_back_to_back_offset = current_offset; + } +#endif + // See if this instruction has a delay slot - if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { + if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { assert(delay_slot != NULL, "expecting delay slot node"); // Back up 1 instruction @@ -1417,15 +1629,15 @@ // Save the offset for the listing #ifndef PRODUCT - if( node_offsets && delay_slot->_idx < node_offset_limit ) + if (node_offsets && delay_slot->_idx < node_offset_limit) node_offsets[delay_slot->_idx] = cb->insts_size(); #endif // Support a SafePoint in the delay slot - if( delay_slot->is_MachSafePoint() ) { + if (delay_slot->is_MachSafePoint()) { MachNode *mach = delay_slot->as_Mach(); // !!!!! Stubs only need an oopmap right now, so bail out - if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { + if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) { // Write the oopmap directly to the code blob??!! # ifdef ENABLE_ZAP_DEAD_LOCALS assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); @@ -1449,21 +1661,15 @@ } } // End for all instructions in block - +#ifdef ASSERT // If the next block is the top of a loop, pad this block out to align // the loop top a little. Helps prevent pipe stalls at loop back branches. - if( i<_cfg->_num_blocks-1 ) { + if (i < _cfg->_num_blocks-1) { Block *nb = _cfg->_blocks[i+1]; uint padding = nb->alignment_padding(current_offset); - if( padding > 0 ) { - MachNode *nop = new (this) MachNopNode(padding / nop_size); - b->_nodes.insert( b->_nodes.size(), nop ); - _cfg->_bbs.map( nop->_idx, b ); - nop->emit(*cb, _regalloc); - current_offset = cb->insts_size(); - } + assert(padding == 0, "alignment should be added already"); } - +#endif } // End of for all blocks non_safepoints.flush_at_end(); @@ -1743,11 +1949,6 @@ // Create a data structure for all the scheduling information Scheduling scheduling(Thread::current()->resource_area(), *this); - // Initialize the space for the BufferBlob used to find and verify - // instruction size in MachNode::emit_size() - init_scratch_buffer_blob(MAX_const_size); - if (failing()) return; // Out of memory - // Walk backwards over each basic block, computing the needed alignment // Walk over all the basic blocks scheduling.DoScheduling(); @@ -2346,6 +2547,12 @@ // have their delay slots filled in the template expansions, so we don't // bother scheduling them. Node *last = bb->_nodes[_bb_end]; + // Ignore trailing NOPs. + while (_bb_end > 0 && last->is_Mach() && + last->as_Mach()->ideal_Opcode() == Op_Con) { + last = bb->_nodes[--_bb_end]; + } + assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); if( last->is_Catch() || // Exclude unreachable path case when Halt node is in a separate block. (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { @@ -2680,6 +2887,23 @@ anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); } + // Kill projections on a branch should appear to occur on the + // branch, not afterwards, so grab the masks from the projections + // and process them. + if (n->is_Branch()) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* use = n->fast_out(i); + if (use->is_Proj()) { + RegMask rm = use->out_RegMask();// Make local copy + while( rm.is_NotEmpty() ) { + OptoReg::Name kill = rm.find_first_elem(); + rm.Remove(kill); + anti_do_def( b, n, kill, false ); + } + } + } + } + // Check each register used by this instruction for a following DEF/KILL // that must occur afterward and requires an anti-dependence edge. for( uint j=0; jreq(); j++ ) {