# HG changeset patch # User iveresov # Date 1397196913 25200 # Node ID 17b2fbdb66379a36b828c2c20e9fa9327f530eb9 # Parent cb1b723cbca8f953bfdd2c78387191d11d0cbaed 8038297: Avoid placing CTI immediately following cbcond instruction on T4 Summary: Insert a nop between cbcond and CTI Reviewed-by: kvn, twisti diff -r cb1b723cbca8 -r 17b2fbdb6637 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Apr 10 23:15:13 2014 -0700 @@ -630,11 +630,20 @@ } protected: + // Insert a nop if the previous is cbcond + void insert_nop_after_cbcond() { + if (UseCBCond && cbcond_before()) { + nop(); + } + } // Delay slot helpers // cti is called when emitting control-transfer instruction, // BEFORE doing the emitting. // Only effective when assertion-checking is enabled. void cti() { + // A cbcond instruction immediately followed by a CTI + // instruction introduces pipeline stalls, we need to avoid that. + no_cbcond_before(); #ifdef CHECK_DELAY assert_not_delayed("cti should not be in delay slot"); #endif @@ -658,7 +667,6 @@ void no_cbcond_before() { assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond"); } - public: bool use_cbcond(Label& L) { diff -r cb1b723cbca8 -r 17b2fbdb6637 src/cpu/sparc/vm/assembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Apr 10 23:15:13 2014 -0700 @@ -54,33 +54,33 @@ inline void Assembler::add(Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } -inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); } -inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); } +inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); } +inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { insert_nop_after_cbcond(); bpr( c, a, p, s1, target(L)); } -inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } -inline void Assembler::fb( Condition c, bool a, Label& L ) { fb(c, a, target(L)); } +inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } +inline void Assembler::fb( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); fb(c, a, target(L)); } -inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } -inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { fbp(c, a, cc, p, target(L)); } +inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } +inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); fbp(c, a, cc, p, target(L)); } -inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } -inline void Assembler::br( Condition c, bool a, Label& L ) { br(c, a, target(L)); } +inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } +inline void Assembler::br( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); br(c, a, target(L)); } -inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } -inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { bp(c, a, cc, p, target(L)); } +inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } +inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); bp(c, a, cc, p, target(L)); } // compare and branch inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); } inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); } -inline void Assembler::call( address d, relocInfo::relocType rt ) { cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); } -inline void Assembler::call( Label& L, relocInfo::relocType rt ) { call( target(L), rt); } +inline void Assembler::call( address d, relocInfo::relocType rt ) { insert_nop_after_cbcond(); cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); } +inline void Assembler::call( Label& L, relocInfo::relocType rt ) { insert_nop_after_cbcond(); call( target(L), rt); } inline void Assembler::flush( Register s1, Register s2) { emit_int32( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); } inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } -inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); } +inline void Assembler::jmpl( Register s1, Register s2, Register d ) { insert_nop_after_cbcond(); cti(); emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } +inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { insert_nop_after_cbcond(); cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); } inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); } inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); } diff -r cb1b723cbca8 -r 17b2fbdb6637 src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp Thu Apr 10 23:15:13 2014 -0700 @@ -233,6 +233,7 @@ } inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) { + insert_nop_after_cbcond(); br(c, a, p, target(L)); } @@ -248,6 +249,7 @@ } inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) { + insert_nop_after_cbcond(); brx(c, a, p, target(L)); } @@ -269,6 +271,7 @@ } inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) { + insert_nop_after_cbcond(); fb(c, a, p, target(L)); } @@ -318,6 +321,7 @@ } inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) { + insert_nop_after_cbcond(); MacroAssembler::call( target(L), rt); } diff -r cb1b723cbca8 -r 17b2fbdb6637 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Mon Apr 14 20:56:48 2014 -0400 +++ b/src/cpu/sparc/vm/sparc.ad Thu Apr 10 23:15:13 2014 -0700 @@ -1268,7 +1268,7 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { Compile* C = ra_->C; - if( do_polling() && ra_->C->is_method_compilation() ) { + if(do_polling() && ra_->C->is_method_compilation()) { st->print("SETHI #PollAddr,L0\t! Load Polling address\n\t"); #ifdef _LP64 st->print("LDX [L0],G0\t!Poll for Safepointing\n\t"); @@ -1277,8 +1277,12 @@ #endif } - if( do_polling() ) + if(do_polling()) { + if (UseCBCond && !ra_->C->is_method_compilation()) { + st->print("NOP\n\t"); + } st->print("RET\n\t"); + } st->print("RESTORE"); } @@ -1291,15 +1295,20 @@ __ verify_thread(); // If this does safepoint polling, then do it here - if( do_polling() && ra_->C->is_method_compilation() ) { + if(do_polling() && ra_->C->is_method_compilation()) { AddressLiteral polling_page(os::get_polling_page()); __ sethi(polling_page, L0); __ relocate(relocInfo::poll_return_type); - __ ld_ptr( L0, 0, G0 ); + __ ld_ptr(L0, 0, G0); } // If this is a return, then stuff the restore in the delay slot - if( do_polling() ) { + if(do_polling()) { + if (UseCBCond && !ra_->C->is_method_compilation()) { + // Insert extra padding for the case when the epilogue is preceded by + // a cbcond jump, which can't be followed by a CTI instruction + __ nop(); + } __ ret(); __ delayed()->restore(); } else { @@ -3330,7 +3339,18 @@ //----------Instruction Attributes--------------------------------------------- ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ins_attrib ins_size(32); // Required size attribute (in bits) -ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back + +// avoid_back_to_back attribute is an expression that must return +// one of the following values defined in MachNode: +// AVOID_NONE - instruction can be placed anywhere +// AVOID_BEFORE - instruction cannot be placed after an +// instruction with MachNode::AVOID_AFTER +// AVOID_AFTER - the next instruction cannot be the one +// with MachNode::AVOID_BEFORE +// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at +// the same time +ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE); + ins_attrib ins_short_branch(0); // Required flag: is this instruction a // non-matching short branch variant of some // long branch? @@ -6630,6 +6650,7 @@ ins_encode %{ __ encode_heap_oop($src$$Register, $dst$$Register); %} + ins_avoid_back_to_back(Universe::narrow_oop_base() == NULL ? AVOID_NONE : AVOID_BEFORE); ins_pipe(ialu_reg); %} @@ -9199,6 +9220,7 @@ __ ba(*L); __ delayed()->nop(); %} + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br); %} @@ -9217,7 +9239,7 @@ __ ba_short(*L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_imm); %} @@ -9231,6 +9253,7 @@ format %{ "BP$cmp $icc,$labl" %} // Prim = bits 24-22, Secnd = bits 31-30 ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9242,6 +9265,7 @@ format %{ "BP$cmp $icc,$labl" %} // Prim = bits 24-22, Secnd = bits 31-30 ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9260,6 +9284,7 @@ __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); __ delayed()->nop(); %} + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9278,6 +9303,7 @@ __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L); __ delayed()->nop(); %} + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_fcc); %} @@ -9290,6 +9316,7 @@ format %{ "BP$cmp $icc,$labl\t! Loop end" %} // Prim = bits 24-22, Secnd = bits 31-30 ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9302,6 +9329,7 @@ format %{ "BP$cmp $icc,$labl\t! Loop end" %} // Prim = bits 24-22, Secnd = bits 31-30 ins_encode( enc_bp( labl, cmp, icc ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9552,7 +9580,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9570,7 +9598,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_imm); %} @@ -9588,7 +9616,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9606,7 +9634,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_imm); %} @@ -9624,7 +9652,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9642,7 +9670,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_imm); %} @@ -9665,7 +9693,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9687,7 +9715,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9705,7 +9733,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9723,7 +9751,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9742,7 +9770,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_reg); %} @@ -9760,7 +9788,7 @@ __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L); %} ins_short_branch(1); - ins_avoid_back_to_back(1); + ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER); ins_pipe(cbcond_reg_imm); %} @@ -9777,6 +9805,7 @@ ins_cost(BRANCH_COST); format %{ "BR$cmp $op1,$labl" %} ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_reg); %} @@ -9789,6 +9818,7 @@ ins_cost(BRANCH_COST); format %{ "BR$cmp $op1,$labl" %} ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_reg); %} @@ -9801,6 +9831,7 @@ ins_cost(BRANCH_COST); format %{ "BR$cmp $op1,$labl" %} ins_encode( enc_bpr( labl, cmp, op1 ) ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_reg); %} @@ -9841,6 +9872,7 @@ __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); __ delayed()->nop(); %} + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(br_cc); %} @@ -9968,6 +10000,7 @@ ins_cost(CALL_COST); format %{ "CALL,static ; NOP ==> " %} ins_encode( Java_Static_Call( meth ), call_epilog ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(simple_call); %} @@ -10004,6 +10037,7 @@ format %{ "CALL,runtime" %} ins_encode( Java_To_Runtime( meth ), call_epilog, adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(simple_call); %} @@ -10016,6 +10050,7 @@ ins_encode( Java_To_Runtime( meth ), call_epilog, adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(simple_call); %} @@ -10028,6 +10063,7 @@ ins_encode( Java_To_Runtime( meth ), call_epilog, adjust_long_from_native_call ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(simple_call); %} @@ -10041,6 +10077,7 @@ ins_cost(CALL_COST); format %{ "Jmp $jump_target ; NOP \t! $method_oop holds method oop" %} ins_encode(form_jmpl(jump_target)); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(tail_call); %} @@ -10072,6 +10109,7 @@ // opcode(Assembler::jmpl_op3, Assembler::arith_op); // The hack duplicates the exception oop into G3, so that CreateEx can use it there. // ins_encode( form3_rs1_simm13_rd( jump_target, 0x00, R_G0 ), move_return_pc_to_o1() ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(tail_call); %} @@ -10102,6 +10140,7 @@ // use the following format syntax format %{ "Jmp rethrow_stub" %} ins_encode(enc_rethrow); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(tail_call); %} @@ -10130,6 +10169,7 @@ ins_cost(DEFAULT_COST*10); format %{ "CALL PartialSubtypeCheck\n\tNOP" %} ins_encode( enc_PartialSubtypeCheck() ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(partial_subtype_check_pipe); %} @@ -10139,6 +10179,7 @@ ins_cost(DEFAULT_COST*10); format %{ "CALL PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %} ins_encode( enc_PartialSubtypeCheck() ); + ins_avoid_back_to_back(AVOID_BEFORE); ins_pipe(partial_subtype_check_pipe); %} diff -r cb1b723cbca8 -r 17b2fbdb6637 src/share/vm/adlc/output_h.cpp --- a/src/share/vm/adlc/output_h.cpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/share/vm/adlc/output_h.cpp Thu Apr 10 23:15:13 2014 -0700 @@ -1613,21 +1613,20 @@ // Each instruction attribute results in a virtual call of same name. // The ins_cost is not handled here. Attribute *attr = instr->_attribs; - bool avoid_back_to_back = false; + Attribute *avoid_back_to_back_attr = NULL; while (attr != NULL) { - if (strcmp (attr->_ident, "ins_cost") != 0 && + if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0) { + fprintf(fp, " virtual bool is_TrapBasedCheckNode() const { return %s; }\n", attr->_val); + } else if (strcmp (attr->_ident, "ins_cost") != 0 && strncmp(attr->_ident, "ins_field_", 10) != 0 && // Must match function in node.hpp: return type bool, no prefix "ins_". strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") != 0 && strcmp (attr->_ident, "ins_short_branch") != 0) { fprintf(fp, " virtual int %s() const { return %s; }\n", attr->_ident, attr->_val); } - // Check value for ins_avoid_back_to_back, and if it is true (1), set the flag - if (!strcmp(attr->_ident, "ins_avoid_back_to_back") != 0 && attr->int_val(*this) != 0) - avoid_back_to_back = true; - if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0) - fprintf(fp, " virtual bool is_TrapBasedCheckNode() const { return %s; }\n", attr->_val); - + if (strcmp(attr->_ident, "ins_avoid_back_to_back") == 0) { + avoid_back_to_back_attr = attr; + } attr = (Attribute *)attr->_next; } @@ -1799,11 +1798,11 @@ } // flag: if this instruction should not be generated back to back. - if ( avoid_back_to_back ) { - if ( node_flags_set ) { - fprintf(fp," | Flag_avoid_back_to_back"); + if (avoid_back_to_back_attr != NULL) { + if (node_flags_set) { + fprintf(fp," | (%s)", avoid_back_to_back_attr->_val); } else { - fprintf(fp,"init_flags(Flag_avoid_back_to_back"); + fprintf(fp,"init_flags((%s)", avoid_back_to_back_attr->_val); node_flags_set = true; } } diff -r cb1b723cbca8 -r 17b2fbdb6637 src/share/vm/opto/machnode.hpp --- a/src/share/vm/opto/machnode.hpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/share/vm/opto/machnode.hpp Thu Apr 10 23:15:13 2014 -0700 @@ -210,7 +210,14 @@ bool may_be_short_branch() const { return (flags() & Flag_may_be_short_branch) != 0; } // Avoid back to back some instructions on some CPUs. - bool avoid_back_to_back() const { return (flags() & Flag_avoid_back_to_back) != 0; } + enum AvoidBackToBackFlag { AVOID_NONE = 0, + AVOID_BEFORE = Flag_avoid_back_to_back_before, + AVOID_AFTER = Flag_avoid_back_to_back_after, + AVOID_BEFORE_AND_AFTER = AVOID_BEFORE | AVOID_AFTER }; + + bool avoid_back_to_back(AvoidBackToBackFlag flag_value) const { + return (flags() & flag_value) == flag_value; + } // instruction implemented with a call bool has_call() const { return (flags() & Flag_has_call) != 0; } diff -r cb1b723cbca8 -r 17b2fbdb6637 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/share/vm/opto/node.hpp Thu Apr 10 23:15:13 2014 -0700 @@ -645,17 +645,18 @@ // Flags are sorted by usage frequency. enum NodeFlags { - Flag_is_Copy = 0x01, // should be first bit to avoid shift - Flag_rematerialize = Flag_is_Copy << 1, + Flag_is_Copy = 0x01, // should be first bit to avoid shift + Flag_rematerialize = Flag_is_Copy << 1, Flag_needs_anti_dependence_check = Flag_rematerialize << 1, - Flag_is_macro = Flag_needs_anti_dependence_check << 1, - Flag_is_Con = Flag_is_macro << 1, - Flag_is_cisc_alternate = Flag_is_Con << 1, - Flag_is_dead_loop_safe = Flag_is_cisc_alternate << 1, - Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, - Flag_avoid_back_to_back = Flag_may_be_short_branch << 1, - Flag_has_call = Flag_avoid_back_to_back << 1, - Flag_is_expensive = Flag_has_call << 1, + Flag_is_macro = Flag_needs_anti_dependence_check << 1, + Flag_is_Con = Flag_is_macro << 1, + Flag_is_cisc_alternate = Flag_is_Con << 1, + Flag_is_dead_loop_safe = Flag_is_cisc_alternate << 1, + Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, + Flag_avoid_back_to_back_before = Flag_may_be_short_branch << 1, + Flag_avoid_back_to_back_after = Flag_avoid_back_to_back_before << 1, + Flag_has_call = Flag_avoid_back_to_back_after << 1, + Flag_is_expensive = Flag_has_call << 1, _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination }; diff -r cb1b723cbca8 -r 17b2fbdb6637 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Mon Apr 14 20:56:48 2014 -0400 +++ b/src/share/vm/opto/output.cpp Thu Apr 10 23:15:13 2014 -0700 @@ -411,7 +411,7 @@ blk_size += nop_size; } } - if (mach->avoid_back_to_back()) { + if (mach->avoid_back_to_back(MachNode::AVOID_BEFORE)) { // Nop is inserted between "avoid back to back" instructions. // ScheduleAndBundle() can rearrange nodes in a block, // check for all offsets inside this block. @@ -439,7 +439,7 @@ last_call_adr = blk_starts[i]+blk_size; } // Remember end of avoid_back_to_back offset - if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { + if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) { last_avoid_back_to_back_adr = blk_starts[i]+blk_size; } } @@ -525,11 +525,11 @@ int new_size = replacement->size(_regalloc); int diff = br_size - new_size; assert(diff >= (int)nop_size, "short_branch size should be smaller"); - // Conservatively take into accound padding between + // Conservatively take into account padding between // avoid_back_to_back branches. Previous branch could be // converted into avoid_back_to_back branch during next // rounds. - if (needs_padding && replacement->avoid_back_to_back()) { + if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) { jmp_offset[i] += nop_size; diff -= nop_size; } @@ -548,7 +548,7 @@ } } // (mach->may_be_short_branch()) if (mach != NULL && (mach->may_be_short_branch() || - mach->avoid_back_to_back())) { + mach->avoid_back_to_back(MachNode::AVOID_AFTER))) { last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i]; } blk_starts[i+1] -= adjust_block_start; @@ -1313,7 +1313,7 @@ if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) { padding = nop_size; } - if (padding == 0 && mach->avoid_back_to_back() && + if (padding == 0 && mach->avoid_back_to_back(MachNode::AVOID_BEFORE) && current_offset == last_avoid_back_to_back_offset) { // Avoid back to back some instructions. padding = nop_size; @@ -1407,7 +1407,7 @@ int new_size = replacement->size(_regalloc); assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); // Insert padding between avoid_back_to_back branches. - if (needs_padding && replacement->avoid_back_to_back()) { + if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) { MachNode *nop = new (this) MachNopNode(); block->insert_node(nop, j++); _cfg->map_node_to_block(nop, block); @@ -1515,7 +1515,7 @@ last_call_offset = current_offset; } - if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) { + if (n->is_Mach() && n->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) { // Avoid back to back some instructions. last_avoid_back_to_back_offset = current_offset; }