# HG changeset patch # User kvn # Date 1311272707 25200 # Node ID 3d42f82cd81143e1bbb2882611bad85df666a725 # Parent 6a991dcb52bb7148b79ce6d5388cbd9ddb3b555c 7063628: Use cbcond on T4 Summary: Add new short branch instruction to Hotspot sparc assembler. Reviewed-by: never, twisti, jrose diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -100,12 +100,19 @@ case call_op: s = "call"; break; case branch_op: switch (inv_op2(inst)) { - case bpr_op2: s = "bpr"; break; case fb_op2: s = "fb"; break; case fbp_op2: s = "fbp"; break; case br_op2: s = "br"; break; case bp_op2: s = "bp"; break; case cb_op2: s = "cb"; break; + case bpr_op2: { + if (is_cbcond(inst)) { + s = is_cxb(inst) ? "cxb" : "cwb"; + } else { + s = "bpr"; + } + break; + } default: s = "????"; break; } } @@ -127,12 +134,21 @@ case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; case branch_op: switch (inv_op2(inst)) { - case bpr_op2: m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos); break; case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + m = wdisp10(word_aligned_ones, 0); + v = wdisp10(dest_pos, inst_pos); + } else { + m = wdisp16(word_aligned_ones, 0); + v = wdisp16(dest_pos, inst_pos); + } + break; + } default: ShouldNotReachHere(); } } @@ -149,12 +165,19 @@ case call_op: r = inv_wdisp(inst, pos, 30); break; case branch_op: switch (inv_op2(inst)) { - case bpr_op2: r = inv_wdisp16(inst, pos); break; case fbp_op2: r = inv_wdisp( inst, pos, 19); break; case bp_op2: r = inv_wdisp( inst, pos, 19); break; case fb_op2: r = inv_wdisp( inst, pos, 22); break; case br_op2: r = inv_wdisp( inst, pos, 22); break; case cb_op2: r = inv_wdisp( inst, pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + r = inv_wdisp10(inst, pos); + } else { + r = inv_wdisp16(inst, pos); + } + break; + } default: ShouldNotReachHere(); } } @@ -968,13 +991,7 @@ Label PcOk; save_frame(0); // to avoid clobbering O0 ld_ptr(pc_addr, L0); - tst(L0); -#ifdef _LP64 - brx(Assembler::zero, false, Assembler::pt, PcOk); -#else - br(Assembler::zero, false, Assembler::pt, PcOk); -#endif // _LP64 - delayed() -> nop(); + br_null_short(L0, Assembler::pt, PcOk); stop("last_Java_pc not zeroed before leaving Java"); bind(PcOk); @@ -1003,7 +1020,7 @@ Label StackOk; andcc(last_java_sp, 0x01, G0); br(Assembler::notZero, false, Assembler::pt, StackOk); - delayed() -> nop(); + delayed()->nop(); stop("Stack Not Biased in set_last_Java_frame"); bind(StackOk); #endif // ASSERT @@ -1099,8 +1116,7 @@ Address exception_addr(G2_thread, Thread::pending_exception_offset()); ld_ptr(exception_addr, scratch_reg); - br_null(scratch_reg,false,pt,L); - delayed()->nop(); + br_null_short(scratch_reg, pt, L); // we use O7 linkage so that forward_exception_entry has the issuing PC call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); delayed()->nop(); @@ -1874,14 +1890,11 @@ // assert((obj & oop_mask) == oop_bits); and3(O0_obj, O2_mask, O4_temp); - cmp(O4_temp, O3_bits); - brx(notEqual, false, pn, null_or_fail); - delayed()->nop(); + cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { // the null_or_fail case is useless; must test for null separately - br_null(O0_obj, false, pn, succeed); - delayed()->nop(); + br_null_short(O0_obj, pn, succeed); } // Check the klassOop of this object for being in the right area of memory. @@ -1893,9 +1906,7 @@ if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) set(Universe::verify_klass_bits(), O3_bits); and3(O0_obj, O2_mask, O4_temp); - cmp(O4_temp, O3_bits); - brx(notEqual, false, pn, fail); - delayed()->nop(); + cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail); // Check the klass's klass load_klass(O0_obj, O0_obj); and3(O0_obj, O2_mask, O4_temp); @@ -2122,13 +2133,12 @@ return Assembler::rc_z; } -// compares register with zero and branches. NOT FOR USE WITH 64-bit POINTERS -void MacroAssembler::br_zero( Condition c, bool a, Predict p, Register s1, Label& L) { +// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS +void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { tst(s1); br (c, a, p, L); } - // Compares a pointer register with zero and branches on null. // Does a test & branch on 32-bit systems and a register-branch on 64-bit. void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { @@ -2154,6 +2164,7 @@ void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { + assert_not_delayed(); if (VM_Version::v9_instructions_work()) { bpr(rc, a, p, s1, d, rt); } else { @@ -2164,6 +2175,7 @@ void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, Register s1, Label& L ) { + assert_not_delayed(); if (VM_Version::v9_instructions_work()) { bpr(rc, a, p, s1, L); } else { @@ -2172,6 +2184,91 @@ } } +// Compare registers and branch with nop in delay slot or cbcond without delay slot. + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, s2, L); + } else { + cmp(s1, s2); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, s2, L); + } else { + cmp(s1, s2); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Short branch version for compares a pointer with zero. + +void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(zero, ptr_cc, s1, 0, L); + return; + } + br_null(s1, false, p, L); + delayed()->nop(); +} + +void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(notZero, ptr_cc, s1, 0, L); + return; + } + br_notnull(s1, false, p, L); + delayed()->nop(); +} + +// Unconditional short branch +void MacroAssembler::ba_short(Label& L) { + if (use_cbcond(L)) { + Assembler::cbcond(equal, icc, G0, G0, L); + return; + } + br(always, false, pt, L); + delayed()->nop(); +} // instruction sequences factored across compiler & interpreter @@ -2197,11 +2294,9 @@ // since that triplet is reached only after finding the high halves differ. if (VM_Version::v9_instructions_work()) { - - mov ( -1, Rresult); - ba( false, done ); delayed()-> movcc(greater, false, icc, 1, Rresult); - } - else { + mov(-1, Rresult); + ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); + } else { br(less, true, pt, done); delayed()-> set(-1, Rresult); br(greater, true, pt, done); delayed()-> set( 1, Rresult); } @@ -2212,9 +2307,8 @@ mov( -1, Rresult); movcc(equal, false, icc, 0, Rresult); movcc(greaterUnsigned, false, icc, 1, Rresult); - } - else { - set(-1, Rresult); + } else { + set(-1, Rresult); br(equal, true, pt, done); delayed()->set( 0, Rresult); br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); } @@ -2250,11 +2344,10 @@ // This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); br(greater, true, pn, big_shift); - delayed()-> - dec(Ralt_count); + delayed()->dec(Ralt_count); // shift < 32 bits, Ralt_count = Rcount-31 @@ -2263,28 +2356,27 @@ // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). - neg( Ralt_count ); + neg(Ralt_count); // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. - srl( Rin_low, Ralt_count, Rxfer_bits ); // shift right by 31-count + srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count if (Rcount != Rout_low) { - sll( Rin_low, Rcount, Rout_low ); // low half + sll(Rin_low, Rcount, Rout_low); // low half } - sll( Rin_high, Rcount, Rout_high ); + sll(Rin_high, Rcount, Rout_high); if (Rcount == Rout_low) { - sll( Rin_low, Rcount, Rout_low ); // low half + sll(Rin_low, Rcount, Rout_low); // low half } - srl( Rxfer_bits, 1, Rxfer_bits ); // shift right by one more - ba (false, done); - delayed()-> - or3( Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low + srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more + ba(done); + delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); - sll( Rin_low, Ralt_count, Rout_high ); - clr( Rout_low ); + sll(Rin_low, Ralt_count, Rout_high ); + clr(Rout_low); bind(done); } @@ -2313,8 +2405,8 @@ // This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); br(greater, true, pn, big_shift); delayed()->dec(Ralt_count); @@ -2325,29 +2417,28 @@ // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). - neg( Ralt_count ); + neg(Ralt_count); if (Rcount != Rout_low) { - srl( Rin_low, Rcount, Rout_low ); + srl(Rin_low, Rcount, Rout_low); } // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. - sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count - sra( Rin_high, Rcount, Rout_high ); // high half - sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + sra(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { - srl( Rin_low, Rcount, Rout_low ); + srl(Rin_low, Rcount, Rout_low); } - ba (false, done); - delayed()-> - or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); - sra( Rin_high, Ralt_count, Rout_low ); - sra( Rin_high, 31, Rout_high ); // sign into hi + sra(Rin_high, Ralt_count, Rout_low); + sra(Rin_high, 31, Rout_high); // sign into hi bind( done ); } @@ -2377,8 +2468,8 @@ // This code can be optimized to use the 64 bit shifts in V9. // Here we use the 32 bit shifts. - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); br(greater, true, pn, big_shift); delayed()->dec(Ralt_count); @@ -2389,29 +2480,28 @@ // more to take care of the special (rare) case where count is zero // (shifting by 32 would not work). - neg( Ralt_count ); + neg(Ralt_count); if (Rcount != Rout_low) { - srl( Rin_low, Rcount, Rout_low ); + srl(Rin_low, Rcount, Rout_low); } // The order of the next two instructions is critical in the case where // Rin and Rout are the same and should not be reversed. - sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count - srl( Rin_high, Rcount, Rout_high ); // high half - sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + srl(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more if (Rcount == Rout_low) { - srl( Rin_low, Rcount, Rout_low ); + srl(Rin_low, Rcount, Rout_low); } - ba (false, done); - delayed()-> - or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high // shift >= 32 bits, Ralt_count = Rcount-32 bind(big_shift); - srl( Rin_high, Ralt_count, Rout_low ); - clr( Rout_high ); + srl(Rin_high, Ralt_count, Rout_low); + clr(Rout_high); bind( done ); } @@ -2419,7 +2509,7 @@ #ifdef _LP64 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { cmp(Ra, Rb); - mov( -1, Rresult); + mov(-1, Rresult); movcc(equal, false, xcc, 0, Rresult); movcc(greater, false, xcc, 1, Rresult); } @@ -2459,14 +2549,14 @@ if (VM_Version::v9_instructions_work()) { - mov( -1, Rresult ); - movcc( eq, true, fcc0, 0, Rresult ); - movcc( gt, true, fcc0, 1, Rresult ); + mov(-1, Rresult); + movcc(eq, true, fcc0, 0, Rresult); + movcc(gt, true, fcc0, 1, Rresult); } else { Label done; - set( -1, Rresult ); + set( -1, Rresult ); //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); fb( eq, true, pn, done); delayed()->set( 0, Rresult ); fb( gt, true, pn, done); delayed()->set( 1, Rresult ); @@ -2668,9 +2758,7 @@ set(StubRoutines::Sparc::locked, lock_reg); bind(retry_get_lock); - cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); - br(Assembler::less, false, Assembler::pt, dont_yield); - delayed()->nop(); + cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); if(use_call_vm) { Untested("Need to verify global reg consistancy"); @@ -2700,9 +2788,7 @@ // yes, got lock. do we have the same top? ld(top_ptr_reg_after_save, 0, value_reg); - cmp(value_reg, top_reg_after_save); - br(Assembler::notEqual, false, Assembler::pn, not_same); - delayed()->nop(); + cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); // yes, same top. st(ptr_reg_after_save, top_ptr_reg_after_save, 0); @@ -2952,8 +3038,7 @@ // on success: restore(); - ba(false, L_success); - delayed()->nop(); + ba_short(L_success); // on failure: bind(L_pop_to_failure); @@ -2969,8 +3054,7 @@ Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset, - Register instanceof_hack) { + RegisterOrConstant super_check_offset) { int sc_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::secondary_super_cache_offset_in_bytes()); int sco_offset = (klassOopDesc::header_size() * HeapWordSize + @@ -2993,29 +3077,10 @@ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1 || instanceof_hack != noreg || + assert(label_nulls <= 1 || (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), "at most one NULL in the batch, usually"); - // Support for the instanceof hack, which uses delay slots to - // set a destination register to zero or one. - bool do_bool_sets = (instanceof_hack != noreg); -#define BOOL_SET(bool_value) \ - if (do_bool_sets && bool_value >= 0) \ - set(bool_value, instanceof_hack) -#define DELAYED_BOOL_SET(bool_value) \ - if (do_bool_sets && bool_value >= 0) \ - delayed()->set(bool_value, instanceof_hack); \ - else delayed()->nop() - // Hacked ba(), which may only be used just before L_fallthrough. -#define FINAL_JUMP(label, bool_value) \ - if (&(label) == &L_fallthrough) { \ - BOOL_SET(bool_value); \ - } else { \ - ba((do_bool_sets && bool_value >= 0), label); \ - DELAYED_BOOL_SET(bool_value); \ - } - // If the pointers are equal, we are done (e.g., String[] elements). // This self-check enables sharing of secondary supertype arrays among // non-primary types such as array-of-interface. Otherwise, each such @@ -3024,8 +3089,8 @@ // type checks are in fact trivially successful in this manner, // so we get a nicely predicted branch right at the start of the check. cmp(super_klass, sub_klass); - brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); - DELAYED_BOOL_SET(1); + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->nop(); // Check the supertype display: if (must_load_sco) { @@ -3049,50 +3114,49 @@ // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point). + // Hacked ba(), which may only be used just before L_fallthrough. +#define FINAL_JUMP(label) \ + if (&(label) != &L_fallthrough) { \ + ba(label); delayed()->nop(); \ + } + if (super_check_offset.is_register()) { - brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); - delayed(); if (do_bool_sets) BOOL_SET(1); - // if !do_bool_sets, sneak the next cmp into the delay slot: - cmp(super_check_offset.as_register(), sc_offset); + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->cmp(super_check_offset.as_register(), sc_offset); if (L_failure == &L_fallthrough) { - brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path); + brx(Assembler::equal, false, Assembler::pt, *L_slow_path); delayed()->nop(); - BOOL_SET(0); // fallthrough on failure } else { - brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); - DELAYED_BOOL_SET(0); - FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_slow_path); } } else if (super_check_offset.as_constant() == sc_offset) { // Need a slow path; fast failure is impossible. if (L_slow_path == &L_fallthrough) { - brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); - DELAYED_BOOL_SET(1); + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); } else { brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); delayed()->nop(); - FINAL_JUMP(*L_success, 1); + FINAL_JUMP(*L_success); } } else { // No slow path; it's a fast decision. if (L_failure == &L_fallthrough) { - brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); - DELAYED_BOOL_SET(1); - BOOL_SET(0); + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); } else { - brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); - DELAYED_BOOL_SET(0); - FINAL_JUMP(*L_success, 1); + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_success); } } bind(L_fallthrough); -#undef final_jump -#undef bool_set -#undef DELAYED_BOOL_SET -#undef final_jump +#undef FINAL_JUMP } @@ -3185,7 +3249,7 @@ st_ptr(super_klass, sub_klass, sc_offset); if (L_success != &L_fallthrough) { - ba(false, *L_success); + ba(*L_success); delayed()->nop(); } @@ -3200,9 +3264,7 @@ // compare method type against that of the receiver RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); load_heap_oop(mh_reg, mhtype_offset, temp_reg); - cmp(temp_reg, mtype_reg); - br(Assembler::notEqual, false, Assembler::pn, wrong_method_type); - delayed()->nop(); + cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type); } @@ -3295,9 +3357,7 @@ // pointers to allow age to be placed into low bits assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); - cmp(temp_reg, markOopDesc::biased_lock_pattern); - brx(Assembler::notEqual, false, Assembler::pn, cas_label); - delayed()->nop(); + cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); load_klass(obj_reg, temp_reg); ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); @@ -3364,8 +3424,7 @@ brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } - br(Assembler::always, false, Assembler::pt, done); - delayed()->nop(); + ba_short(done); bind(try_rebias); // At this point we know the epoch has expired, meaning that the @@ -3393,8 +3452,7 @@ brx(Assembler::notEqual, true, Assembler::pn, *slow_case); delayed()->nop(); } - br(Assembler::always, false, Assembler::pt, done); - delayed()->nop(); + ba_short(done); bind(try_revoke_bias); // The prototype mark in the klass doesn't have the bias bit set any @@ -3445,7 +3503,7 @@ // Solaris/SPARC's "as". Another apt name would be cas_ptr() void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { - casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ; + casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); } @@ -3486,9 +3544,9 @@ } if (EmitSync & 1) { - mov (3, Rscratch) ; - st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - cmp (SP, G0) ; + mov(3, Rscratch); + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + cmp(SP, G0); return ; } @@ -3529,7 +3587,7 @@ assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); andcc(Rscratch, 0xfffff003, Rscratch); st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - bind (done) ; + bind (done); return ; } @@ -3538,7 +3596,7 @@ if (EmitSync & 256) { Label IsInflated ; - ld_ptr (mark_addr, Rmark); // fetch obj->mark + ld_ptr(mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); @@ -3549,49 +3607,49 @@ // Store mark into displaced mark field in the on-stack basic-lock "box" // Critically, this must happen before the CAS // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. - st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); - andcc (Rmark, 2, G0) ; - brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; - delayed() -> + st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + andcc(Rmark, 2, G0); + brx(Assembler::notZero, false, Assembler::pn, IsInflated); + delayed()-> // Try stack-lock acquisition. // Beware: the 1st instruction is in a delay slot - mov (Rbox, Rscratch); - or3 (Rmark, markOopDesc::unlocked_value, Rmark); - assert (mark_addr.disp() == 0, "cas must take a zero displacement"); - casn (mark_addr.base(), Rmark, Rscratch) ; - cmp (Rmark, Rscratch); - brx (Assembler::equal, false, Assembler::pt, done); + mov(Rbox, Rscratch); + or3(Rmark, markOopDesc::unlocked_value, Rmark); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casn(mark_addr.base(), Rmark, Rscratch); + cmp(Rmark, Rscratch); + brx(Assembler::equal, false, Assembler::pt, done); delayed()->sub(Rscratch, SP, Rscratch); // Stack-lock attempt failed - check for recursive stack-lock. // See the comments below about how we might remove this case. #ifdef _LP64 - sub (Rscratch, STACK_BIAS, Rscratch); + sub(Rscratch, STACK_BIAS, Rscratch); #endif assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - andcc (Rscratch, 0xfffff003, Rscratch); - br (Assembler::always, false, Assembler::pt, done) ; - delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - - bind (IsInflated) ; + andcc(Rscratch, 0xfffff003, Rscratch); + br(Assembler::always, false, Assembler::pt, done); + delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + + bind(IsInflated); if (EmitSync & 64) { // If m->owner != null goto IsLocked // Pessimistic form: Test-and-CAS vs CAS // The optimistic form avoids RTS->RTO cache line upgrades. - ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - andcc (Rscratch, Rscratch, G0) ; - brx (Assembler::notZero, false, Assembler::pn, done) ; - delayed()->nop() ; + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. - add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; - mov (G2_thread, Rscratch) ; - casn (Rmark, G0, Rscratch) ; - cmp (Rscratch, G0) ; + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); + cmp(Rscratch, G0); // Intentional fall-through into done } else { // Aggressively avoid the Store-before-CAS penalty @@ -3599,9 +3657,9 @@ Label IsInflated, Recursive ; // Anticipate CAS -- Avoid RTS->RTO upgrade -// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; - - ld_ptr (mark_addr, Rmark); // fetch obj->mark +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + + ld_ptr(mark_addr, Rmark); // fetch obj->mark // Triage: biased, stack-locked, neutral, inflated if (try_bias) { @@ -3609,8 +3667,8 @@ // Invariant: if control reaches this point in the emitted stream // then Rmark has not been modified. } - andcc (Rmark, 2, G0) ; - brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; + andcc(Rmark, 2, G0); + brx(Assembler::notZero, false, Assembler::pn, IsInflated); delayed()-> // Beware - dangling delay-slot // Try stack-lock acquisition. @@ -3620,23 +3678,21 @@ // ST obj->mark = box -- overwrite transient 0 value // This presumes TSO, of course. - mov (0, Rscratch) ; - or3 (Rmark, markOopDesc::unlocked_value, Rmark); - assert (mark_addr.disp() == 0, "cas must take a zero displacement"); - casn (mark_addr.base(), Rmark, Rscratch) ; -// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; - cmp (Rscratch, Rmark) ; - brx (Assembler::notZero, false, Assembler::pn, Recursive) ; - delayed() -> - st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + mov(0, Rscratch); + or3(Rmark, markOopDesc::unlocked_value, Rmark); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casn(mark_addr.base(), Rmark, Rscratch); +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + cmp(Rscratch, Rmark); + brx(Assembler::notZero, false, Assembler::pn, Recursive); + delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); if (counters != NULL) { cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); } - br (Assembler::always, false, Assembler::pt, done); - delayed() -> - st_ptr (Rbox, mark_addr) ; - - bind (Recursive) ; + ba(done); + delayed()->st_ptr(Rbox, mark_addr); + + bind(Recursive); // Stack-lock attempt failed - check for recursive stack-lock. // Tests show that we can remove the recursive case with no impact // on refworkload 0.83. If we need to reduce the size of the code @@ -3653,49 +3709,48 @@ // RScratch contains the fetched obj->mark value from the failed CASN. #ifdef _LP64 - sub (Rscratch, STACK_BIAS, Rscratch); + sub(Rscratch, STACK_BIAS, Rscratch); #endif sub(Rscratch, SP, Rscratch); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - andcc (Rscratch, 0xfffff003, Rscratch); + andcc(Rscratch, 0xfffff003, Rscratch); if (counters != NULL) { // Accounting needs the Rscratch register - st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); - br (Assembler::always, false, Assembler::pt, done) ; - delayed()->nop() ; + ba_short(done); } else { - br (Assembler::always, false, Assembler::pt, done) ; - delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + ba(done); + delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); } - bind (IsInflated) ; + bind (IsInflated); if (EmitSync & 64) { // If m->owner != null goto IsLocked // Test-and-CAS vs CAS // Pessimistic form avoids futile (doomed) CAS attempts // The optimistic form avoids RTS->RTO cache line upgrades. - ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - andcc (Rscratch, Rscratch, G0) ; - brx (Assembler::notZero, false, Assembler::pn, done) ; - delayed()->nop() ; + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); // m->owner == null : it's unlocked. } // Try to CAS m->owner from null to Self // Invariant: if we acquire the lock then _recursions should be 0. - add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; - mov (G2_thread, Rscratch) ; - casn (Rmark, G0, Rscratch) ; - cmp (Rscratch, G0) ; + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); + cmp(Rscratch, G0); // ST box->displaced_header = NonZero. // Any non-zero value suffices: // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. - st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); + st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); // Intentional fall-through into done } - bind (done) ; + bind (done); } void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, @@ -3706,7 +3761,7 @@ Label done ; if (EmitSync & 4) { - cmp (SP, G0) ; + cmp(SP, G0); return ; } @@ -3717,18 +3772,16 @@ // Test first if it is a fast recursive unlock ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); - cmp(Rmark, G0); - brx(Assembler::equal, false, Assembler::pt, done); - delayed()->nop(); + br_null_short(Rmark, Assembler::pt, done); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object assert(mark_addr.disp() == 0, "cas must take a zero displacement"); casx_under_lock(mark_addr.base(), Rbox, Rmark, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - br (Assembler::always, false, Assembler::pt, done); + ba(done); delayed()->cmp(Rbox, Rmark); - bind (done) ; + bind(done); return ; } @@ -3743,14 +3796,14 @@ biased_locking_exit(mark_addr, Rscratch, done); } - ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ; - ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); - andcc (Rscratch, Rscratch, G0); - brx (Assembler::zero, false, Assembler::pn, done); - delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS - andcc (Rmark, 2, G0) ; - brx (Assembler::zero, false, Assembler::pt, LStacked) ; - delayed()-> nop() ; + ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); + ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pn, done); + delayed()->nop(); // consider: relocate fetch of mark, above, into this DS + andcc(Rmark, 2, G0); + brx(Assembler::zero, false, Assembler::pt, LStacked); + delayed()->nop(); // It's inflated // Conceptually we need a #loadstore|#storestore "release" MEMBAR before @@ -3761,48 +3814,45 @@ // Note that we use 1-0 locking by default for the inflated case. We // close the resultant (and rare) race by having contented threads in // monitorenter periodically poll _owner. - ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); - xor3 (Rscratch, G2_thread, Rscratch) ; - orcc (Rbox, Rscratch, Rbox) ; - brx (Assembler::notZero, false, Assembler::pn, done) ; + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); + xor3(Rscratch, G2_thread, Rscratch); + orcc(Rbox, Rscratch, Rbox); + brx(Assembler::notZero, false, Assembler::pn, done); delayed()-> - ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); - ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); - orcc (Rbox, Rscratch, G0) ; + ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); + ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); + orcc(Rbox, Rscratch, G0); if (EmitSync & 65536) { Label LSucc ; - brx (Assembler::notZero, false, Assembler::pn, LSucc) ; - delayed()->nop() ; - br (Assembler::always, false, Assembler::pt, done) ; - delayed()-> - st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); - - bind (LSucc) ; - st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); - if (os::is_MP()) { membar (StoreLoad) ; } - ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); - andcc (Rscratch, Rscratch, G0) ; - brx (Assembler::notZero, false, Assembler::pt, done) ; - delayed()-> andcc (G0, G0, G0) ; - add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; - mov (G2_thread, Rscratch) ; - casn (Rmark, G0, Rscratch) ; - cmp (Rscratch, G0) ; + brx(Assembler::notZero, false, Assembler::pn, LSucc); + delayed()->nop(); + ba(done); + delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + + bind(LSucc); + st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + if (os::is_MP()) { membar (StoreLoad); } + ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); // invert icc.zf and goto done - brx (Assembler::notZero, false, Assembler::pt, done) ; - delayed() -> cmp (G0, G0) ; - br (Assembler::always, false, Assembler::pt, done); - delayed() -> cmp (G0, 1) ; + br_notnull(Rscratch, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + ba(done); + delayed()->cmp(G0, 1); } else { - brx (Assembler::notZero, false, Assembler::pn, done) ; - delayed()->nop() ; - br (Assembler::always, false, Assembler::pt, done) ; - delayed()-> - st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + ba(done); + delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); } - bind (LStacked) ; + bind (LStacked); // Consider: we could replace the expensive CAS in the exit // path with a simple ST of the displaced mark value fetched from // the on-stack basiclock box. That admits a race where a thread T2 @@ -3831,11 +3881,11 @@ // A prototype implementation showed excellent results, although // the scavenger and timeout code was rather involved. - casn (mark_addr.base(), Rbox, Rscratch) ; - cmp (Rbox, Rscratch); + casn(mark_addr.base(), Rbox, Rscratch); + cmp(Rbox, Rscratch); // Intentional fall through into done ... - bind (done) ; + bind(done); } @@ -3891,9 +3941,7 @@ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); or3(t1, t2, t3); - cmp(t1, t2); - br(Assembler::greaterEqual, false, Assembler::pn, next); - delayed()->nop(); + cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); stop("assert(top >= start)"); should_not_reach_here(); @@ -3901,17 +3949,13 @@ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); or3(t3, t2, t3); - cmp(t1, t2); - br(Assembler::lessEqual, false, Assembler::pn, next2); - delayed()->nop(); + cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); stop("assert(top <= end)"); should_not_reach_here(); bind(next2); and3(t3, MinObjAlignmentInBytesMask, t3); - cmp(t3, 0); - br(Assembler::lessEqual, false, Assembler::pn, ok); - delayed()->nop(); + cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); stop("assert(aligned)"); should_not_reach_here(); @@ -3937,8 +3981,7 @@ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. - br(Assembler::always, false, Assembler::pt, slow_case); - delayed()->nop(); + ba_short(slow_case); } else { // get eden boundaries // note: we need both top & top_addr! @@ -4072,8 +4115,7 @@ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { // No allocation in the shared eden. - br(Assembler::always, false, Assembler::pt, slow_case); - delayed()->nop(); + ba_short(slow_case); } ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); @@ -4098,8 +4140,7 @@ add(t2, 1, t2); stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); } - br(Assembler::always, false, Assembler::pt, try_eden); - delayed()->nop(); + ba_short(try_eden); bind(discard_tlab); if (TLABStats) { @@ -4115,8 +4156,7 @@ // if tlab is currently allocated (top or end != null) then // fill [top, end + alignment_reserve) with array object - br_null(top, false, Assembler::pn, do_refill); - delayed()->nop(); + br_null_short(top, Assembler::pn, do_refill); set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word @@ -4151,9 +4191,7 @@ Label ok; ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); sll_ptr(t2, LogHeapWordSize, t2); - cmp(t1, t2); - br(Assembler::equal, false, Assembler::pt, ok); - delayed()->nop(); + cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok); stop("assert(t1 == tlab_size)"); should_not_reach_here(); @@ -4164,8 +4202,7 @@ sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); verify_tlab(); - br(Assembler::always, false, Assembler::pt, retry); - delayed()->nop(); + ba_short(retry); } void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, @@ -4290,12 +4327,15 @@ BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); CodeBuffer buf(bb); MacroAssembler masm(&buf); - address start = masm.pc(); + +#define __ masm. + + address start = __ pc(); Register pre_val; Label refill, restart; if (with_frame) { - masm.save_frame(0); + __ save_frame(0); pre_val = I0; // Was O0 before the save. } else { pre_val = O0; @@ -4310,57 +4350,59 @@ in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), "check sizes in assembly below"); - masm.bind(restart); - masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); - - masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); + __ bind(restart); + __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); // If the branch is taken, no harm in executing this in the delay slot. - masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); - masm.sub(L0, oopSize, L0); - - masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 if (!with_frame) { // Use return-from-leaf - masm.retl(); - masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); } else { // Not delayed. - masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); + __ st_ptr(L0, G2_thread, satb_q_index_byte_offset); } if (with_frame) { - masm.ret(); - masm.delayed()->restore(); + __ ret(); + __ delayed()->restore(); } - masm.bind(refill); + __ bind(refill); address handle_zero = CAST_FROM_FN_PTR(address, &SATBMarkQueueSet::handle_zero_index_for_thread); // This should be rare enough that we can afford to save all the // scratch registers that the calling context might be using. - masm.mov(G1_scratch, L0); - masm.mov(G3_scratch, L1); - masm.mov(G4, L2); + __ mov(G1_scratch, L0); + __ mov(G3_scratch, L1); + __ mov(G4, L2); // We need the value of O0 above (for the write into the buffer), so we // save and restore it. - masm.mov(O0, L3); + __ mov(O0, L3); // Since the call will overwrite O7, we save and restore that, as well. - masm.mov(O7, L4); - masm.call_VM_leaf(L5, handle_zero, G2_thread); - masm.mov(L0, G1_scratch); - masm.mov(L1, G3_scratch); - masm.mov(L2, G4); - masm.mov(L3, O0); - masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); - masm.delayed()->mov(L4, O7); + __ mov(O7, L4); + __ call_VM_leaf(L5, handle_zero, G2_thread); + __ mov(L0, G1_scratch); + __ mov(L1, G3_scratch); + __ mov(L2, G4); + __ mov(L3, O0); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); if (with_frame) { satb_log_enqueue_with_frame = start; - satb_log_enqueue_with_frame_end = masm.pc(); + satb_log_enqueue_with_frame_end = __ pc(); } else { satb_log_enqueue_frameless = start; - satb_log_enqueue_frameless_end = masm.pc(); + satb_log_enqueue_frameless_end = __ pc(); } + +#undef __ } static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { @@ -4426,7 +4468,7 @@ // Check on whether to annul. br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); - delayed() -> nop(); + delayed()->nop(); // Do we need to load the previous value? if (obj != noreg) { @@ -4450,7 +4492,7 @@ // Is the previous value null? // Check on whether to annul. br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); - delayed() -> nop(); + delayed()->nop(); // OK, it's not filtered, so we'll need to call enqueue. In the normal // case, pre_val will be a scratch G-reg, but there are some cases in @@ -4518,79 +4560,83 @@ BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); CodeBuffer buf(bb); MacroAssembler masm(&buf); - address start = masm.pc(); +#define __ masm. + address start = __ pc(); Label not_already_dirty, restart, refill; #ifdef _LP64 - masm.srlx(O0, CardTableModRefBS::card_shift, O0); + __ srlx(O0, CardTableModRefBS::card_shift, O0); #else - masm.srl(O0, CardTableModRefBS::card_shift, O0); + __ srl(O0, CardTableModRefBS::card_shift, O0); #endif AddressLiteral addrlit(byte_map_base); - masm.set(addrlit, O1); // O1 := - masm.ldub(O0, O1, O2); // O2 := [O0 + O1] - - masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + __ set(addrlit, O1); // O1 := + __ ldub(O0, O1, O2); // O2 := [O0 + O1] + + __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, O2, not_already_dirty); // Get O1 + O2 into a reg by itself -- useful in the take-the-branch // case, harmless if not. - masm.delayed()->add(O0, O1, O3); + __ delayed()->add(O0, O1, O3); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf - masm.retl(); - masm.delayed()->nop(); + __ retl(); + __ delayed()->nop(); // Not dirty. - masm.bind(not_already_dirty); + __ bind(not_already_dirty); // First, dirty it. - masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). int dirty_card_q_index_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()); int dirty_card_q_buf_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); - masm.bind(restart); - masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); - - masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + __ bind(restart); + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); // If the branch is taken, no harm in executing this in the delay slot. - masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); - masm.sub(L0, oopSize, L0); - - masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 + __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(O3, L1, L0); // [_buf + index] := I0 // Use return-from-leaf - masm.retl(); - masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); - - masm.bind(refill); + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); address handle_zero = CAST_FROM_FN_PTR(address, &DirtyCardQueueSet::handle_zero_index_for_thread); // This should be rare enough that we can afford to save all the // scratch registers that the calling context might be using. - masm.mov(G1_scratch, L3); - masm.mov(G3_scratch, L5); + __ mov(G1_scratch, L3); + __ mov(G3_scratch, L5); // We need the value of O3 above (for the write into the buffer), so we // save and restore it. - masm.mov(O3, L6); + __ mov(O3, L6); // Since the call will overwrite O7, we save and restore that, as well. - masm.mov(O7, L4); - - masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); - masm.mov(L3, G1_scratch); - masm.mov(L5, G3_scratch); - masm.mov(L6, O3); - masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); - masm.delayed()->mov(L4, O7); + __ mov(O7, L4); + + __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + __ mov(L3, G1_scratch); + __ mov(L5, G3_scratch); + __ mov(L6, O3); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); dirty_card_log_enqueue = start; - dirty_card_log_enqueue_end = masm.pc(); + dirty_card_log_enqueue_end = __ pc(); // XXX Should have a guarantee here about not going off the end! // Does it already do so? Do an experiment... + +#undef __ + } static inline void @@ -4903,7 +4949,7 @@ delayed()->mov(G0, result); // not equal // only one char ? - br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone); + cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); delayed()->add(G0, 1, result); // zero-length arrays are equal // word by word compare, dont't need alignment check diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jul 21 11:25:07 2011 -0700 @@ -761,7 +761,7 @@ mwtos_opf = 0x119 }; - enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7 }; + enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez }; enum Condition { // for FBfcc & FBPfcc instruction @@ -866,9 +866,18 @@ return is_simm(d, nbits + 2); } + address target_distance(Label& L) { + // Assembler::target(L) should be called only when + // a branch instruction is emitted since non-bound + // labels record current pc() as a branch address. + if (L.is_bound()) return target(L); + // Return current address for non-bound labels. + return pc(); + } + // test if label is in simm16 range in words (wdisp16). bool is_in_wdisp16_range(Label& L) { - return is_in_wdisp_range(target(L), pc(), 16); + return is_in_wdisp_range(target_distance(L), pc(), 16); } // test if the distance between two addresses fits in simm30 range in words static bool is_in_wdisp30_range(address a, address b) { @@ -975,6 +984,20 @@ static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit static int opf( int x) { return u_field(x, 13, 5); } + static bool is_cbcond( int x ) { + return (VM_Version::has_cbcond() && (inv_cond(x) > rc_last) && + inv_op(x) == branch_op && inv_op2(x) == bpr_op2); + } + static bool is_cxb( int x ) { + assert(is_cbcond(x), "wrong instruction"); + return (x & (1<<21)) != 0; + } + static int cond_cbcond( int x) { return u_field((((x & 8)<<1) + 8 + (x & 7)), 29, 25); } + static int inv_cond_cbcond(int x) { + assert(is_cbcond(x), "wrong instruction"); + return inv_u_field(x, 27, 25) | (inv_u_field(x, 29, 29)<<3); + } + static int opf_cc( CC c, bool useFloat ) { return u_field((useFloat ? 0 : 4) + c, 13, 11); } static int mov_cc( CC c, bool useFloat ) { return u_field(useFloat ? 0 : 1, 18, 18) | u_field(c, 12, 11); } @@ -1026,6 +1049,26 @@ return r; } + // compute inverse of wdisp10 + static intptr_t inv_wdisp10(int x, intptr_t pos) { + assert(is_cbcond(x), "wrong instruction"); + int lo = inv_u_field(x, 12, 5); + int hi = (x >> 19) & 3; + if (hi >= 2) hi |= ~1; + return (((hi << 8) | lo) << 2) + pos; + } + + // word offset for cbcond, 8 bits at [B12,B5], 2 bits at [B20,B19] + static int wdisp10(intptr_t x, intptr_t off) { + assert(VM_Version::has_cbcond(), "This CPU does not have CBCOND instruction"); + intptr_t xx = x - off; + assert_signed_word_disp_range(xx, 10); + int r = ( ( (xx >> 2 ) & ((1 << 8) - 1) ) << 5 ) + | ( ( (xx >> (2+8)) & 3 ) << 19 ); + // Have to fake cbcond instruction to pass assert in inv_wdisp10() + assert(inv_wdisp10((r | op(branch_op) | cond_cbcond(rc_last+1) | op2(bpr_op2)), off) == x, "inverse is not inverse"); + return r; + } // word displacement in low-order nbits bits @@ -1138,6 +1181,24 @@ #endif } + // cbcond instruction should not be generated one after an other + bool cbcond_before() { + if (offset() == 0) return false; // it is first instruction + int x = *(int*)(intptr_t(pc()) - 4); // previous instruction + return is_cbcond(x); + } + + void no_cbcond_before() { + assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond"); + } + + bool use_cbcond(Label& L) { + if (!UseCBCond || cbcond_before()) return false; + intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc()); + assert( (x & 3) == 0, "not word aligned"); + return is_simm(x, 12); + } + public: // Tells assembler you know that next instruction is delayed Assembler* delayed() { @@ -1181,10 +1242,11 @@ void addccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); } void addccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } + // pp 136 - inline void bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); - inline void bpr( RCondition c, bool a, Predict p, Register s1, Label& L); + inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none); + inline void bpr(RCondition c, bool a, Predict p, Register s1, Label& L); protected: // use MacroAssembler::br instead @@ -1198,8 +1260,6 @@ inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L ); - public: - // pp 144 inline void br( Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none ); @@ -1215,11 +1275,17 @@ inline void cb( Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none ); inline void cb( Condition c, bool a, Label& L ); + // compare and branch + inline void cbcond(Condition c, CC cc, Register s1, Register s2, Label& L); + inline void cbcond(Condition c, CC cc, Register s1, int simm5, Label& L); + // pp 149 inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type ); inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type ); + public: + // pp 150 // These instructions compare the contents of s2 with the contents of @@ -1862,8 +1928,8 @@ inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void fb( Condition c, bool a, Predict p, Label& L ); - // compares register with zero and branches (V9 and V8 instructions) - void br_zero( Condition c, bool a, Predict p, Register s1, Label& L); + // compares register with zero (32 bit) and branches (V9 and V8 instructions) + void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn ); // Compares a pointer register with zero and branches on (not)null. // Does a test & branch on 32-bit systems and a register-branch on 64-bit. void br_null ( Register s1, bool a, Predict p, Label& L ); @@ -1875,6 +1941,26 @@ void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); + // + // Compare registers and branch with nop in delay slot or cbcond without delay slot. + // + // ATTENTION: use these instructions with caution because cbcond instruction + // has very short distance: 512 instructions (2Kbyte). + + // Compare integer (32 bit) values (icc only). + void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64). + void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + + // Short branch version for compares a pointer pwith zero. + void br_null_short ( Register s1, Predict p, Label& L ); + void br_notnull_short( Register s1, Predict p, Label& L ); + + // unconditional short branch + void ba_short(Label& L); + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); @@ -1882,8 +1968,8 @@ inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void brx( Condition c, bool a, Predict p, Label& L ); - // unconditional short branch - inline void ba( bool a, Label& L ); + // unconditional branch + inline void ba( Label& L ); // Branch that tests fp condition codes inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); @@ -2167,7 +2253,6 @@ inline void stbool(Register d, const Address& a) { stb(d, a); } inline void ldbool(const Address& a, Register d) { ldsb(a, d); } - inline void tstbool( Register s ) { tst(s); } inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); } // klass oop manipulations if compressed @@ -2469,8 +2554,7 @@ Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset = RegisterOrConstant(-1), - Register instanceof_hack = noreg); + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); // The rest of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/assembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Jul 21 11:25:07 2011 -0700 @@ -80,32 +80,36 @@ inline void Assembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rtype ); } inline void Assembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec ); } -inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); } +inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); } inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); } -inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } +inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } inline void Assembler::fb( Condition c, bool a, Label& L ) { fb(c, a, target(L)); } -inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } +inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { fbp(c, a, cc, p, target(L)); } -inline void Assembler::cb( Condition c, bool a, address d, relocInfo::relocType rt ) { v8_only(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(cb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } +inline void Assembler::cb( Condition c, bool a, address d, relocInfo::relocType rt ) { v8_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(cb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } inline void Assembler::cb( Condition c, bool a, Label& L ) { cb(c, a, target(L)); } -inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } +inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt); has_delay_slot(); } inline void Assembler::br( Condition c, bool a, Label& L ) { br(c, a, target(L)); } -inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } +inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt); has_delay_slot(); } inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { bp(c, a, cc, p, target(L)); } -inline void Assembler::call( address d, relocInfo::relocType rt ) { emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); } +// compare and branch +inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); } +inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label& L) { cti(); no_cbcond_before(); emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); } + +inline void Assembler::call( address d, relocInfo::relocType rt ) { cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); } inline void Assembler::call( Label& L, relocInfo::relocType rt ) { call( target(L), rt); } inline void Assembler::flush( Register s1, Register s2) { emit_long( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); } inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::jmpl( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } -inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); } +inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } +inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); } inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) { if (s2.is_register()) ldf(w, s1, s2.as_register(), d); @@ -240,8 +244,8 @@ inline void Assembler::prefetch(const Address& a, PrefetchFcn f, int offset) { v9_only(); relocate(a.rspec(offset)); prefetch(a.base(), a.disp() + offset, f); } -inline void Assembler::rett( Register s1, Register s2 ) { emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } -inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt); has_delay_slot(); } +inline void Assembler::rett( Register s1, Register s2 ) { cti(); emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } +inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { cti(); emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt); has_delay_slot(); } inline void Assembler::sethi( int imm22a, Register d, RelocationHolder const& rspec ) { emit_data( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(imm22a), rspec); } @@ -557,8 +561,8 @@ brx(c, a, p, target(L)); } -inline void MacroAssembler::ba( bool a, Label& L ) { - br(always, a, pt, L); +inline void MacroAssembler::ba( Label& L ) { + br(always, false, pt, L); } // Warning: V9 only functions diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -303,9 +303,7 @@ assert(_oop_index >= 0, "must have oop index"); __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3); __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3); - __ cmp(G2_thread, G3); - __ br(Assembler::notEqual, false, Assembler::pn, call_patch); - __ delayed()->nop(); + __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch); // load_klass patches may execute the patched code before it's // copied back into place so we need to jump back into the main diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -217,9 +217,7 @@ { Label L; __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7); - __ cmp(G0, O7); - __ br(Assembler::notEqual, false, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_br_short(O7, G0, Assembler::notEqual, Assembler::pt, L); __ stop("locked object is NULL"); __ bind(L); } @@ -2096,10 +2094,10 @@ __ xor3(O0, -1, tmp); __ sub(length, tmp, length); __ add(src_pos, tmp, src_pos); - __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); + __ cmp_zero_and_br(Assembler::less, O0, *stub->entry()); __ delayed()->add(dst_pos, tmp, dst_pos); } else { - __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); + __ cmp_zero_and_br(Assembler::less, O0, *stub->entry()); __ delayed()->nop(); } __ bind(*stub->continuation()); @@ -2123,22 +2121,19 @@ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { // test src_pos register - __ tst(src_pos); - __ br(Assembler::less, false, Assembler::pn, *stub->entry()); + __ cmp_zero_and_br(Assembler::less, src_pos, *stub->entry()); __ delayed()->nop(); } if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { // test dst_pos register - __ tst(dst_pos); - __ br(Assembler::less, false, Assembler::pn, *stub->entry()); + __ cmp_zero_and_br(Assembler::less, dst_pos, *stub->entry()); __ delayed()->nop(); } if (flags & LIR_OpArrayCopy::length_positive_check) { // make sure length isn't negative - __ tst(length); - __ br(Assembler::less, false, Assembler::pn, *stub->entry()); + __ cmp_zero_and_br(Assembler::less, length, *stub->entry()); __ delayed()->nop(); } @@ -2261,8 +2256,7 @@ #ifndef PRODUCT if (PrintC1Statistics) { Label failed; - __ br_notnull(O0, false, Assembler::pn, failed); - __ delayed()->nop(); + __ br_notnull_short(O0, Assembler::pn, failed); __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3); __ bind(failed); } @@ -2314,9 +2308,7 @@ __ br(Assembler::notEqual, false, Assembler::pn, halt); // load the raw value of the src klass. __ delayed()->lduw(src, oopDesc::klass_offset_in_bytes(), tmp2); - __ cmp(tmp, tmp2); - __ br(Assembler::equal, false, Assembler::pn, known_ok); - __ delayed()->nop(); + __ cmp_and_br_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok); } else { __ cmp(tmp, tmp2); __ br(Assembler::equal, false, Assembler::pn, known_ok); @@ -2330,9 +2322,7 @@ __ cmp(tmp, tmp2); __ brx(Assembler::notEqual, false, Assembler::pn, halt); __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp2); - __ cmp(tmp, tmp2); - __ brx(Assembler::equal, false, Assembler::pn, known_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok); } else { __ cmp(tmp, tmp2); __ brx(Assembler::equal, false, Assembler::pn, known_ok); @@ -2530,15 +2520,13 @@ mdo_offset_bias); __ ld_ptr(receiver_addr, tmp1); __ verify_oop(tmp1); - __ cmp(recv, tmp1); - __ brx(Assembler::notEqual, false, Assembler::pt, next_test); - __ delayed()->nop(); + __ cmp_and_brx_short(recv, tmp1, Assembler::notEqual, Assembler::pt, next_test); Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias); __ ld_ptr(data_addr, tmp1); __ add(tmp1, DataLayout::counter_increment, tmp1); __ st_ptr(tmp1, data_addr); - __ ba(false, *update_done); + __ ba(*update_done); __ delayed()->nop(); __ bind(next_test); } @@ -2549,13 +2537,12 @@ Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias); __ ld_ptr(recv_addr, tmp1); - __ br_notnull(tmp1, false, Assembler::pt, next_test); - __ delayed()->nop(); + __ br_notnull_short(tmp1, Assembler::pt, next_test); __ st_ptr(recv, recv_addr); __ set(DataLayout::counter_increment, tmp1); __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias); - __ ba(false, *update_done); + __ ba(*update_done); __ delayed()->nop(); __ bind(next_test); } @@ -2601,8 +2588,7 @@ setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias); Label not_null; - __ br_notnull(obj, false, Assembler::pn, not_null); - __ delayed()->nop(); + __ br_notnull_short(obj, Assembler::pn, not_null); Register mdo = k_RInfo; Register data_val = Rtmp1; jobject2reg(md->constant_encoding(), mdo); @@ -2614,7 +2600,7 @@ __ ldub(flags_addr, data_val); __ or3(data_val, BitData::null_seen_byte_constant(), data_val); __ stb(data_val, flags_addr); - __ ba(false, *obj_is_null); + __ ba(*obj_is_null); __ delayed()->nop(); __ bind(not_null); } else { @@ -2682,7 +2668,7 @@ __ load_klass(obj, recv); type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success); // Jump over the failure case - __ ba(false, *success); + __ ba(*success); __ delayed()->nop(); // Cast failure case __ bind(profile_cast_failure); @@ -2695,10 +2681,10 @@ __ ld_ptr(data_addr, tmp1); __ sub(tmp1, DataLayout::counter_increment, tmp1); __ st_ptr(tmp1, data_addr); - __ ba(false, *failure); + __ ba(*failure); __ delayed()->nop(); } - __ ba(false, *success); + __ ba(*success); __ delayed()->nop(); } @@ -2728,8 +2714,7 @@ if (op->should_profile()) { Label not_null; - __ br_notnull(value, false, Assembler::pn, not_null); - __ delayed()->nop(); + __ br_notnull_short(value, Assembler::pn, not_null); Register mdo = k_RInfo; Register data_val = Rtmp1; jobject2reg(md->constant_encoding(), mdo); @@ -2741,12 +2726,10 @@ __ ldub(flags_addr, data_val); __ or3(data_val, BitData::null_seen_byte_constant(), data_val); __ stb(data_val, flags_addr); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); __ bind(not_null); } else { - __ br_null(value, false, Assembler::pn, done); - __ delayed()->nop(); + __ br_null_short(value, Assembler::pn, done); } add_debug_info_for_null_check_here(op->info_for_exception()); __ load_klass(array, k_RInfo); @@ -2777,8 +2760,7 @@ } __ load_klass(value, recv); type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); // Cast failure case __ bind(profile_cast_failure); jobject2reg(md->constant_encoding(), mdo); @@ -2790,7 +2772,7 @@ __ ld_ptr(data_addr, tmp1); __ sub(tmp1, DataLayout::counter_increment, tmp1); __ st_ptr(tmp1, data_addr); - __ ba(false, *stub->entry()); + __ ba(*stub->entry()); __ delayed()->nop(); } __ bind(done); @@ -2808,8 +2790,7 @@ emit_typecheck_helper(op, &success, &failure, &failure); __ bind(failure); __ set(0, dst); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); __ bind(success); __ set(1, dst); __ bind(done); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -41,9 +41,7 @@ // Note: needs more testing of out-of-line vs. inline slow case verify_oop(receiver); load_klass(receiver, temp_reg); - cmp(temp_reg, iCache); - brx(Assembler::equal, true, Assembler::pt, L); - delayed()->nop(); + cmp_and_brx_short(temp_reg, iCache, Assembler::equal, Assembler::pt, L); AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); jump_to(ic_miss, temp_reg); delayed()->nop(); @@ -142,8 +140,7 @@ } // Test first it it is a fast recursive unlock ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); - br_null(Rmark, false, Assembler::pt, done); - delayed()->nop(); + br_null_short(Rmark, Assembler::pt, done); if (!UseBiasedLocking) { // load object ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop); @@ -231,7 +228,7 @@ if (!is_simm13(obj_size * wordSize)) { // would need to use extra register to load // object size => go the slow case for now - br(Assembler::always, false, Assembler::pt, slow_case); + ba(slow_case); delayed()->nop(); return; } @@ -257,12 +254,10 @@ Label ok; ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1); if (var_size_in_bytes != noreg) { - cmp(t1, var_size_in_bytes); + cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok); } else { - cmp(t1, con_size_in_bytes); + cmp_and_brx_short(t1, con_size_in_bytes, Assembler::equal, Assembler::pt, ok); } - brx(Assembler::equal, false, Assembler::pt, ok); - delayed()->nop(); stop("bad size in initialize_object"); should_not_reach_here(); @@ -387,8 +382,7 @@ void C1_MacroAssembler::verify_not_null_oop(Register r) { Label not_null; - br_notnull(r, false, Assembler::pt, not_null); - delayed()->nop(); + br_notnull_short(r, Assembler::pt, not_null); stop("non-null oop required"); bind(not_null); if (!VerifyOops) return; diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -71,8 +71,7 @@ { Label L; Address exception_addr(G2_thread, Thread::pending_exception_offset()); ld_ptr(exception_addr, Gtemp); - br_null(Gtemp, false, pt, L); - delayed()->nop(); + br_null_short(Gtemp, pt, L); Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); st_ptr(G0, vm_result_addr); Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); @@ -333,9 +332,7 @@ assert(deopt_blob != NULL, "deoptimization blob must have been created"); Label no_deopt; - __ tst(O0); - __ brx(Assembler::equal, false, Assembler::pt, no_deopt); - __ delayed()->nop(); + __ br_null_short(O0, Assembler::pt, no_deopt); // return to the deoptimization handler entry for unpacking and rexecute // if we simply returned the we'd deopt as if any call we patched had just @@ -402,18 +399,15 @@ if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1); - __ cmp(G3_t1, instanceKlass::fully_initialized); - __ br(Assembler::notEqual, false, Assembler::pn, slow_path); - __ delayed()->nop(); + __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path); } #ifdef ASSERT // assert object can be fast path allocated { Label ok, not_ok; __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size); - __ cmp(G1_obj_size, 0); // make sure it's an instance (LH > 0) - __ br(Assembler::lessEqual, false, Assembler::pn, not_ok); - __ delayed()->nop(); + // make sure it's an instance (LH > 0) + __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); __ br(Assembler::zero, false, Assembler::pn, ok); __ delayed()->nop(); @@ -501,9 +495,7 @@ int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); - __ cmp(G3_t1, tag); - __ brx(Assembler::equal, false, Assembler::pt, ok); - __ delayed()->nop(); + __ cmp_and_brx_short(G3_t1, tag, Assembler::equal, Assembler::pt, ok); __ stop("assert(is an array klass)"); __ should_not_reach_here(); __ bind(ok); @@ -519,9 +511,7 @@ // check that array length is small enough for fast path __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1); - __ cmp(G4_length, G3_t1); - __ br(Assembler::greaterUnsigned, false, Assembler::pn, slow_path); - __ delayed()->nop(); + __ cmp_and_br_short(G4_length, G3_t1, Assembler::greaterUnsigned, Assembler::pn, slow_path); // if we got here then the TLAB allocation failed, so try // refilling the TLAB or allocating directly from eden. diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/cppInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -544,7 +544,7 @@ // Generate regular method entry __ bind(slow_path); - __ ba(false, fast_accessor_slow_entry_path); + __ ba(fast_accessor_slow_entry_path); __ delayed()->nop(); return entry; } @@ -719,8 +719,7 @@ Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset())); __ ld_ptr(exception_addr, G3_scratch); - __ br_notnull(G3_scratch, false, Assembler::pn, pending_exception_present); - __ delayed()->nop(); + __ br_notnull_short(G3_scratch, Assembler::pn, pending_exception_present); __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::signature_handler_offset())), G3_scratch); __ bind(L); } @@ -1292,7 +1291,7 @@ deopt_frame_manager_return_atos = __ pc(); // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_OBJECT), L3_scratch); // Result stub address array index @@ -1300,14 +1299,14 @@ deopt_frame_manager_return_btos = __ pc(); // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_BOOLEAN), L3_scratch); // Result stub address array index // deopt needs to jump to here to enter the interpreter (return a result) deopt_frame_manager_return_itos = __ pc(); // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_INT), L3_scratch); // Result stub address array index // deopt needs to jump to here to enter the interpreter (return a result) @@ -1327,21 +1326,21 @@ __ srlx(G1,32,O0); #endif /* !_LP64 && COMPILER2 */ // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_LONG), L3_scratch); // Result stub address array index // deopt needs to jump to here to enter the interpreter (return a result) deopt_frame_manager_return_ftos = __ pc(); // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_FLOAT), L3_scratch); // Result stub address array index // deopt needs to jump to here to enter the interpreter (return a result) deopt_frame_manager_return_dtos = __ pc(); // O0/O1 live - __ ba(false, return_from_deopt_common); + __ ba(return_from_deopt_common); __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_DOUBLE), L3_scratch); // Result stub address array index // deopt needs to jump to here to enter the interpreter (return a result) @@ -1398,7 +1397,7 @@ __ ld_ptr(STATE(_stack), L1_scratch); // Get current stack top __ sub(L1_scratch, entry_size, L1_scratch); __ st_ptr(L1_scratch, STATE(_stack)); - __ ba(false, entry); + __ ba(entry); __ delayed()->add(L1_scratch, wordSize, L1_scratch); // first real entry (undo prepush) // 2. move expression stack @@ -1651,7 +1650,7 @@ __ set((int)BytecodeInterpreter::got_monitors, L1_scratch); VALIDATE_STATE(G3_scratch, 5); - __ ba(false, call_interpreter); + __ ba(call_interpreter); __ delayed()->st(L1_scratch, STATE(_msg)); // uncommon trap needs to jump to here to enter the interpreter (re-execute current bytecode) @@ -1659,7 +1658,7 @@ // QQQ what message do we send - __ ba(false, call_interpreter); + __ ba(call_interpreter); __ delayed()->ld_ptr(STATE(_frame_bottom), SP); // restore to full stack frame //============================================================================= @@ -1675,7 +1674,7 @@ // ready to resume the interpreter __ set((int)BytecodeInterpreter::deopt_resume, L1_scratch); - __ ba(false, call_interpreter); + __ ba(call_interpreter); __ delayed()->st(L1_scratch, STATE(_msg)); // Current frame has caught an exception we need to dispatch to the @@ -1763,7 +1762,7 @@ // L1_scratch points to top of stack (prepushed) - __ ba(false, resume_interpreter); + __ ba(resume_interpreter); __ delayed()->mov(L1_scratch, O1); // An exception is being caught on return to a vanilla interpreter frame. @@ -1773,7 +1772,7 @@ __ ld_ptr(STATE(_frame_bottom), SP); // restore to full stack frame __ ld_ptr(STATE(_stack_base), O1); // empty java expression stack - __ ba(false, resume_interpreter); + __ ba(resume_interpreter); __ delayed()->sub(O1, wordSize, O1); // account for prepush // Return from interpreted method we return result appropriate to the caller (i.e. "recursive" @@ -1852,7 +1851,7 @@ __ set((int)BytecodeInterpreter::method_resume, L1_scratch); __ st(L1_scratch, STATE(_msg)); - __ ba(false, call_interpreter_2); + __ ba(call_interpreter_2); __ delayed()->st_ptr(O1, STATE(_stack)); @@ -1867,8 +1866,8 @@ __ cmp(Gtmp1, O7); // returning to interpreter? __ brx(Assembler::equal, true, Assembler::pt, re_dispatch); // yep __ delayed()->nop(); - __ ba(false, re_dispatch); - __ delayed()->mov(G0, prevState); // initial entry + __ ba(re_dispatch); + __ delayed()->mov(G0, prevState); // initial entry } @@ -2031,8 +2030,8 @@ __ brx(Assembler::zero, false, Assembler::pt, unwind_and_forward); __ delayed()->nop(); - __ ld_ptr(STATE(_locals), O1); // get result of popping callee's args - __ ba(false, unwind_recursive_activation); + __ ld_ptr(STATE(_locals), O1); // get result of popping callee's args + __ ba(unwind_recursive_activation); __ delayed()->nop(); interpreter_frame_manager = entry_point; diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/interp_masm_sparc.cpp --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -236,17 +236,13 @@ Label L; Register thr_state = G3_scratch; ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), thr_state); - tst(thr_state); - br(zero, false, pt, L); // if (thread->jvmti_thread_state() == NULL) exit; - delayed()->nop(); + br_null_short(thr_state, pt, L); // if (thread->jvmti_thread_state() == NULL) exit; // Initiate earlyret handling only if it is not already being processed. // If the flag has the earlyret_processing bit set, it means that this code // is called *during* earlyret handling - we don't want to reenter. ld(thr_state, JvmtiThreadState::earlyret_state_offset(), G4_scratch); - cmp(G4_scratch, JvmtiThreadState::earlyret_pending); - br(Assembler::notEqual, false, pt, L); - delayed()->nop(); + cmp_and_br_short(G4_scratch, JvmtiThreadState::earlyret_pending, Assembler::notEqual, pt, L); // Call Interpreter::remove_activation_early_entry() to get the address of the // same-named entrypoint in the generated interpreter code @@ -566,9 +562,7 @@ #ifdef _LP64 sub(Rtemp, STACK_BIAS, Rtemp); // Bias Rtemp before cmp to FP #endif - cmp(Rtemp, FP); - brx(Assembler::greaterUnsigned, false, Assembler::pn, Bad); - delayed()->nop(); + cmp_and_brx_short(Rtemp, FP, Assembler::greaterUnsigned, Assembler::pn, Bad); // Saved SP must not be ridiculously below current SP. size_t maxstack = MAX2(JavaThread::stack_size_at_create(), (size_t) 4*K*K); @@ -577,12 +571,9 @@ #ifdef _LP64 add(Rtemp, STACK_BIAS, Rtemp); // Unbias Rtemp before cmp to Rsp #endif - cmp(Rsp, Rtemp); - brx(Assembler::lessUnsigned, false, Assembler::pn, Bad); - delayed()->nop(); - - br(Assembler::always, false, Assembler::pn, OK); - delayed()->nop(); + cmp_and_brx_short(Rsp, Rtemp, Assembler::lessUnsigned, Assembler::pn, Bad); + + ba_short(OK); bind(Bad); stop("on return to interpreted call, restored SP is corrupted"); @@ -630,8 +621,7 @@ const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); ld(interp_only, scratch); - tst(scratch); - br(Assembler::notZero, true, Assembler::pn, skip_compiled_code); + cmp_zero_and_br(Assembler::notZero, scratch, skip_compiled_code, true, Assembler::pn); delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target); bind(skip_compiled_code); } @@ -641,8 +631,7 @@ #ifdef ASSERT { Label ok; - br_notnull(target, false, Assembler::pt, ok); - delayed()->nop(); + br_notnull_short(target, Assembler::pt, ok); stop("null entry point"); bind(ok); } @@ -982,8 +971,7 @@ // Don't unlock anything if the _do_not_unlock_if_synchronized flag // is set. - tstbool(G1_scratch); - br(Assembler::notZero, false, pn, no_unlock); + cmp_zero_and_br(Assembler::notZero, G1_scratch, no_unlock); delayed()->nop(); // BasicObjectLock will be first in list, since this is a synchronized method. However, need @@ -997,8 +985,7 @@ add( top_most_monitor(), O1 ); ld_ptr(O1, BasicObjectLock::obj_offset_in_bytes(), G3_scratch); - br_notnull(G3_scratch, false, pt, unlock); - delayed()->nop(); + br_notnull_short(G3_scratch, pt, unlock); if (throw_monitor_exception) { // Entry already unlocked need to throw an exception @@ -1011,8 +998,7 @@ if (install_monitor_exception) { MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); } - ba(false, unlocked); - delayed()->nop(); + ba_short(unlocked); } bind(unlock); @@ -1037,15 +1023,13 @@ add(top_most_monitor(), Rmptr, delta); { Label L; // ensure that Rmptr starts out above (or at) Rlimit - cmp(Rmptr, Rlimit); - brx(Assembler::greaterEqualUnsigned, false, pn, L); - delayed()->nop(); + cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L); stop("monitor stack has negative size"); bind(L); } #endif bind(restart); - ba(false, entry); + ba(entry); delayed()-> add(top_most_monitor(), Rmptr, delta); // points to current entry, starting with bottom-most entry @@ -1061,8 +1045,7 @@ if (install_monitor_exception) { MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); } - ba(false, restart); - delayed()->nop(); + ba_short(restart); } bind(loop); @@ -1073,9 +1056,7 @@ #ifdef ASSERT { Label L; // ensure that Rmptr has not somehow stepped below Rlimit - cmp(Rmptr, Rlimit); - brx(Assembler::greaterEqualUnsigned, false, pn, L); - delayed()->nop(); + cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L); stop("ran off the end of the monitor stack"); bind(L); } @@ -1196,9 +1177,7 @@ (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); // if the compare and exchange succeeded we are done (we saw an unlocked object) - cmp(mark_reg, temp_reg); - brx(Assembler::equal, true, Assembler::pt, done); - delayed()->nop(); + cmp_and_brx_short(mark_reg, temp_reg, Assembler::equal, Assembler::pt, done); // We did not see an unlocked object so try the fast recursive case @@ -1324,13 +1303,7 @@ void InterpreterMacroAssembler::test_method_data_pointer(Label& zero_continue) { assert(ProfileInterpreter, "must be profiling interpreter"); -#ifdef _LP64 - bpr(Assembler::rc_z, false, Assembler::pn, ImethodDataPtr, zero_continue); -#else - tst(ImethodDataPtr); - br(Assembler::zero, false, Assembler::pn, zero_continue); -#endif - delayed()->nop(); + br_null_short(ImethodDataPtr, Assembler::pn, zero_continue); } void InterpreterMacroAssembler::verify_method_data_pointer() { @@ -1376,31 +1349,18 @@ Label done; // if no method data exists, and the counter is high enough, make one -#ifdef _LP64 - bpr(Assembler::rc_nz, false, Assembler::pn, ImethodDataPtr, done); -#else - tst(ImethodDataPtr); - br(Assembler::notZero, false, Assembler::pn, done); -#endif + br_notnull_short(ImethodDataPtr, Assembler::pn, done); // Test to see if we should create a method data oop AddressLiteral profile_limit((address) &InvocationCounter::InterpreterProfileLimit); -#ifdef _LP64 - delayed()->nop(); sethi(profile_limit, Rtmp); -#else - delayed()->sethi(profile_limit, Rtmp); -#endif ld(Rtmp, profile_limit.low10(), Rtmp); - cmp(invocation_count, Rtmp); - br(Assembler::lessUnsigned, false, Assembler::pn, profile_continue); - delayed()->nop(); + cmp_and_br_short(invocation_count, Rtmp, Assembler::lessUnsigned, Assembler::pn, profile_continue); // Build it now. call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); set_method_data_pointer_for_bcp(); - ba(false, profile_continue); - delayed()->nop(); + ba_short(profile_continue); bind(done); } @@ -1632,13 +1592,10 @@ Label skip_receiver_profile; if (receiver_can_be_null) { Label not_null; - tst(receiver); - brx(Assembler::notZero, false, Assembler::pt, not_null); - delayed()->nop(); + br_notnull_short(receiver, Assembler::pt, not_null); // We are making a call. Increment the count for null receiver. increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); - ba(false, skip_receiver_profile); - delayed()->nop(); + ba_short(skip_receiver_profile); bind(not_null); } @@ -1682,8 +1639,7 @@ // The receiver is receiver[n]. Increment count[n]. int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); increment_mdp_data_at(count_offset, scratch); - ba(false, done); - delayed()->nop(); + ba_short(done); bind(next_test); if (test_for_null_also) { @@ -1697,8 +1653,7 @@ // Receiver did not match any saved receiver and there is no empty row for it. // Increment total counter to indicate polymorphic case. increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); - ba(false, done); - delayed()->nop(); + ba_short(done); bind(found_null); } else { brx(Assembler::notZero, false, Assembler::pt, done); @@ -1729,8 +1684,7 @@ mov(DataLayout::counter_increment, scratch); set_mdp_data_at(count_offset, scratch); if (start_row > 0) { - ba(false, done); - delayed()->nop(); + ba_short(done); } } @@ -1772,8 +1726,7 @@ // The method data pointer needs to be updated to reflect the new target. update_mdp_by_offset(in_bytes(RetData::bci_displacement_offset(row)), scratch); - ba(false, profile_continue); - delayed()->nop(); + ba_short(profile_continue); bind(next_test); } @@ -1922,8 +1875,8 @@ // untested("monitor stack expansion"); compute_stack_base(Rtemp); - ba( false, start_copying ); - delayed()->cmp( Rtemp, Rlimit); // done? duplicated below + ba(start_copying); + delayed()->cmp(Rtemp, Rlimit); // done? duplicated below // note: must copy from low memory upwards // On entry to loop, @@ -2010,9 +1963,7 @@ // untested("reg area corruption"); add(Rindex, offset, Rscratch); add(Rlimit, 64 + STACK_BIAS, Rscratch1); - cmp(Rscratch, Rscratch1); - brx(Assembler::greaterEqualUnsigned, false, pn, L); - delayed()->nop(); + cmp_and_brx_short(Rscratch, Rscratch1, Assembler::greaterEqualUnsigned, pn, L); stop("regsave area is being clobbered"); bind(L); } @@ -2174,9 +2125,7 @@ AddressLiteral limit(&InvocationCounter::InterpreterBackwardBranchLimit); load_contents(limit, Rtmp); - cmp(backedge_count, Rtmp); - br(Assembler::lessUnsigned, false, Assembler::pt, did_not_overflow); - delayed()->nop(); + cmp_and_br_short(backedge_count, Rtmp, Assembler::lessUnsigned, Assembler::pt, did_not_overflow); // When ProfileInterpreter is on, the backedge_count comes from the // methodDataOop, which value does not get reset on the call to @@ -2196,15 +2145,11 @@ // Was an OSR adapter generated? // O0 = osr nmethod - tst(O0); - brx(Assembler::zero, false, Assembler::pn, overflow_with_error); - delayed()->nop(); + br_null_short(O0, Assembler::pn, overflow_with_error); // Has the nmethod been invalidated already? ld(O0, nmethod::entry_bci_offset(), O2); - cmp(O2, InvalidOSREntryBci); - br(Assembler::equal, false, Assembler::pn, overflow_with_error); - delayed()->nop(); + cmp_and_br_short(O2, InvalidOSREntryBci, Assembler::equal, Assembler::pn, overflow_with_error); // migrate the interpreter frame off of the stack @@ -2270,8 +2215,7 @@ mov(reg, Rtmp); const int log2_bytecode_size_limit = 16; srl(Rtmp, log2_bytecode_size_limit, Rtmp); - br_notnull( Rtmp, false, pt, test ); - delayed()->nop(); + br_notnull_short( Rtmp, pt, test ); // %%% should use call_VM_leaf here? save_frame_and_mov(0, Lmethod, O0, reg, O1); @@ -2320,9 +2264,7 @@ Register temp_reg = O5; const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); ld(interp_only, temp_reg); - tst(temp_reg); - br(zero, false, pt, L); - delayed()->nop(); + cmp_and_br_short(temp_reg, 0, equal, pt, L); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); bind(L); } @@ -2372,9 +2314,7 @@ Register temp_reg = O5; const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset()); ld(interp_only, temp_reg); - tst(temp_reg); - br(zero, false, pt, L); - delayed()->nop(); + cmp_and_br_short(temp_reg, 0, equal, pt, L); // Note: frame::interpreter_frame_result has a dependency on how the // method result is saved across the call to post_method_exit. For diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/interpreter_sparc.cpp --- a/src/cpu/sparc/vm/interpreter_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/interpreter_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -191,22 +191,19 @@ // Optimization, see if there are any more args and get out prior to checking // all 16 float registers. My guess is that this is rare. // If is_register is false, then we are done the first six integer args. - __ tst(G4_scratch); - __ brx(Assembler::zero, false, Assembler::pt, done); - __ delayed()->nop(); - + __ br_null_short(G4_scratch, Assembler::pt, done); } - __ ba(false, NextArg); + __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(LoadFloatArg); __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4); - __ ba(false, NextArg); + __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(LoadDoubleArg); __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() ); - __ ba(false, NextArg); + __ ba(NextArg); __ delayed()->srl( G4_scratch, 2, G4_scratch ); __ bind(NextArg); @@ -234,8 +231,7 @@ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), O2, O2, true); // returns verified_entry_point or NULL // we ignore it in any case - __ ba(false, Lcontinue); - __ delayed()->nop(); + __ ba_short(Lcontinue); } diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/methodHandles_sparc.cpp --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -287,9 +287,7 @@ BLOCK_COMMENT("verify_clean {"); // Magic numbers must check out: __ set((int32_t) MAGIC_NUMBER_1, O7_temp); - __ cmp(O7_temp, L0_magic_number_1); - __ br(Assembler::equal, false, Assembler::pt, L_ok_1); - __ delayed()->nop(); + __ cmp_and_br_short(O7_temp, L0_magic_number_1, Assembler::equal, Assembler::pt, L_ok_1); __ stop("damaged ricochet frame: MAGIC_NUMBER_1 not found"); __ BIND(L_ok_1); @@ -301,9 +299,7 @@ #else Register FP_temp = FP; #endif - __ cmp(L4_saved_args_base, FP_temp); - __ br(Assembler::greaterEqualUnsigned, false, Assembler::pt, L_ok_2); - __ delayed()->nop(); + __ cmp_and_brx_short(L4_saved_args_base, FP_temp, Assembler::greaterEqualUnsigned, Assembler::pt, L_ok_2); __ stop("damaged ricochet frame: L4 < FP"); __ BIND(L_ok_2); @@ -316,15 +312,11 @@ __ BIND(L_ok_3); extract_conversion_dest_type(_masm, L5_conversion, O7_temp); - __ cmp(O7_temp, T_VOID); - __ br(Assembler::equal, false, Assembler::pt, L_ok_4); - __ delayed()->nop(); + __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4); extract_conversion_vminfo(_masm, L5_conversion, O5_temp); __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp); assert(__ is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13"); - __ cmp(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER); - __ brx(Assembler::equal, false, Assembler::pt, L_ok_4); - __ delayed()->nop(); + __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4); __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found"); __ BIND(L_ok_4); BLOCK_COMMENT("} verify_clean"); @@ -363,9 +355,7 @@ if (VerifyMethodHandles) { Label L_ok, L_bad; int32_t stack_move_limit = 0x0800; // extra-large - __ cmp(stack_move_reg, stack_move_limit); - __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad); - __ delayed()->nop(); + __ cmp_and_br_short(stack_move_reg, stack_move_limit, Assembler::greaterEqual, Assembler::pn, L_bad); __ cmp(stack_move_reg, -stack_move_limit); __ br(Assembler::greater, false, Assembler::pt, L_ok); __ delayed()->nop(); @@ -401,13 +391,9 @@ // Verify that argslot lies within (Gargs, FP]. Label L_ok, L_bad; BLOCK_COMMENT("verify_argslot {"); + __ cmp_and_brx_short(Gargs, argslot_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad); __ add(FP, STACK_BIAS, temp_reg); // STACK_BIAS is zero on !_LP64 - __ cmp(argslot_reg, temp_reg); - __ brx(Assembler::greaterUnsigned, false, Assembler::pn, L_bad); - __ delayed()->nop(); - __ cmp(Gargs, argslot_reg); - __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(argslot_reg, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok); __ BIND(L_bad); __ stop(error_message); __ BIND(L_ok); @@ -434,14 +420,10 @@ } __ add(arg_slot_base_reg, __ argument_offset(arg_slots, temp_reg), temp_reg); __ add(FP, STACK_BIAS, temp2_reg); // STACK_BIAS is zero on !_LP64 - __ cmp(temp_reg, temp2_reg); - __ brx(Assembler::greaterUnsigned, false, Assembler::pn, L_bad); - __ delayed()->nop(); + __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad); // Gargs points to the first word so adjust by BytesPerWord __ add(arg_slot_base_reg, BytesPerWord, temp_reg); - __ cmp(Gargs, temp_reg); - __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(Gargs, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok); __ BIND(L_bad); __ stop(error_message); __ BIND(L_ok); @@ -502,21 +484,16 @@ Label L_ok, L_bad; BLOCK_COMMENT("verify_klass {"); __ verify_oop(obj_reg); - __ br_null(obj_reg, false, Assembler::pn, L_bad); - __ delayed()->nop(); + __ br_null_short(obj_reg, Assembler::pn, L_bad); __ load_klass(obj_reg, temp_reg); __ set(ExternalAddress(klass_addr), temp2_reg); __ ld_ptr(Address(temp2_reg, 0), temp2_reg); - __ cmp(temp_reg, temp2_reg); - __ brx(Assembler::equal, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok); intptr_t super_check_offset = klass->super_check_offset(); __ ld_ptr(Address(temp_reg, super_check_offset), temp_reg); __ set(ExternalAddress(klass_addr), temp2_reg); __ ld_ptr(Address(temp2_reg, 0), temp2_reg); - __ cmp(temp_reg, temp2_reg); - __ brx(Assembler::equal, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok); __ BIND(L_bad); __ stop(error_message); __ BIND(L_ok); @@ -671,9 +648,7 @@ #ifdef ASSERT { Label L_ok; - __ cmp(arg_slots.as_register(), 0); - __ br(Assembler::greaterEqual, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_br_short(arg_slots.as_register(), 0, Assembler::greaterEqual, Assembler::pt, L_ok); __ stop("negative arg_slots"); __ bind(L_ok); } @@ -748,9 +723,7 @@ __ ld_ptr( Address(temp_reg, 0 ), temp2_reg); __ st_ptr(temp2_reg, Address(temp_reg, offset) ); __ add(temp_reg, wordSize, temp_reg); - __ cmp(temp_reg, argslot_reg); - __ brx(Assembler::lessUnsigned, false, Assembler::pt, loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(temp_reg, argslot_reg, Assembler::lessUnsigned, Assembler::pt, loop); } // Now move the argslot down, to point to the opened-up space. @@ -797,9 +770,7 @@ __ ld_ptr( Address(temp_reg, 0 ), temp2_reg); __ st_ptr(temp2_reg, Address(temp_reg, offset) ); __ sub(temp_reg, wordSize, temp_reg); - __ cmp(temp_reg, Gargs); - __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, L_loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(temp_reg, Gargs, Assembler::greaterEqualUnsigned, Assembler::pt, L_loop); } // And adjust the argslot address to point at the deletion point. @@ -848,8 +819,7 @@ __ delayed()->nop(); __ ld_ptr( Address(argslot_reg, 0), temp_reg); __ st_ptr(temp_reg, Address(Gargs, 0)); - __ ba(false, L_break); - __ delayed()->nop(); // FILLME + __ ba_short(L_break); __ BIND(L_plural); // Loop for 2 or more: @@ -863,9 +833,7 @@ __ sub(Gargs, wordSize, Gargs ); __ ld_ptr( Address(top_reg, 0), temp2_reg); __ st_ptr(temp2_reg, Address(Gargs, 0)); - __ cmp(top_reg, argslot_reg); - __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(top_reg, argslot_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop); __ BIND(L_break); } BLOCK_COMMENT("} push_arg_slots"); @@ -897,17 +865,13 @@ __ br(Assembler::lessEqual, false, Assembler::pn, L_bad); __ delayed()->nop(); } - __ cmp(bottom_reg, top_reg); - __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); __ BIND(L_bad); __ stop("valid bounds (copy up)"); __ BIND(L_ok); } #endif - __ cmp(bottom_reg, top_reg); - __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pn, L_break); - __ delayed()->nop(); + __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break); // work top down to bottom, copying contiguous data upwards // In pseudo-code: // while (--top >= bottom) *(top + distance) = *(top + 0); @@ -916,9 +880,7 @@ __ sub(top_reg, wordSize, top_reg); __ ld_ptr( Address(top_reg, 0 ), temp2_reg); __ st_ptr(temp2_reg, Address(top_reg, offset) ); - __ cmp(top_reg, bottom_reg); - __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop); assert(Interpreter::stackElementSize == wordSize, "else change loop"); __ BIND(L_break); BLOCK_COMMENT("} move_arg_slots_up"); @@ -951,17 +913,13 @@ __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad); __ delayed()->nop(); } - __ cmp(bottom_reg, top_reg); - __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); __ BIND(L_bad); __ stop("valid bounds (copy down)"); __ BIND(L_ok); } #endif - __ cmp(bottom_reg, top_reg); - __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pn, L_break); - __ delayed()->nop(); + __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break); // work bottom up to top, copying contiguous data downwards // In pseudo-code: // while (bottom < top) *(bottom - distance) = *(bottom + 0), bottom++; @@ -970,9 +928,7 @@ __ ld_ptr( Address(bottom_reg, 0 ), temp2_reg); __ st_ptr(temp2_reg, Address(bottom_reg, offset) ); __ add(bottom_reg, wordSize, bottom_reg); - __ cmp(bottom_reg, top_reg); - __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_loop); assert(Interpreter::stackElementSize == wordSize, "else change loop"); __ BIND(L_break); BLOCK_COMMENT("} move_arg_slots_down"); @@ -1329,9 +1285,7 @@ Label L_done; __ ld_ptr(vmarg, O2_scratch); - __ tst(O2_scratch); - __ brx(Assembler::zero, false, Assembler::pn, L_done); // No cast if null. - __ delayed()->nop(); + __ br_null_short(O2_scratch, Assembler::pn, L_done); // No cast if null. __ load_klass(O2_scratch, O2_scratch); // Live at this point: @@ -1436,8 +1390,7 @@ // this path is taken for int->byte, int->short __ sra(O1_scratch, G5_vminfo, O1_scratch); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); __ bind(zero_extend); // this is taken for int->char @@ -1860,9 +1813,7 @@ BLOCK_COMMENT("verify collect_count_constant {"); __ load_method_handle_vmslots(O3_scratch, G3_method_handle, O2_scratch); Label L_count_ok; - __ cmp(O3_scratch, collect_count_constant); - __ br(Assembler::equal, false, Assembler::pt, L_count_ok); - __ delayed()->nop(); + __ cmp_and_br_short(O3_scratch, collect_count_constant, Assembler::equal, Assembler::pt, L_count_ok); __ stop("bad vminfo in AMH.conv"); __ BIND(L_count_ok); BLOCK_COMMENT("} verify collect_count_constant"); @@ -1909,9 +1860,7 @@ BLOCK_COMMENT("verify dest_slot_constant {"); extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O3_scratch); Label L_vminfo_ok; - __ cmp(O3_scratch, dest_slot_constant); - __ br(Assembler::equal, false, Assembler::pt, L_vminfo_ok); - __ delayed()->nop(); + __ cmp_and_br_short(O3_scratch, dest_slot_constant, Assembler::equal, Assembler::pt, L_vminfo_ok); __ stop("bad vminfo in AMH.conv"); __ BIND(L_vminfo_ok); BLOCK_COMMENT("} verify dest_slot_constant"); @@ -1951,14 +1900,10 @@ // If there are variable parameters, use dynamic checks to skip around the whole mess. Label L_done; if (keep3_count.is_register()) { - __ tst(keep3_count.as_register()); - __ br(Assembler::zero, false, Assembler::pn, L_done); - __ delayed()->nop(); + __ cmp_and_br_short(keep3_count.as_register(), 0, Assembler::equal, Assembler::pn, L_done); } if (close_count.is_register()) { - __ cmp(close_count.as_register(), open_count); - __ br(Assembler::equal, false, Assembler::pn, L_done); - __ delayed()->nop(); + __ cmp_and_br_short(close_count.as_register(), open_count, Assembler::equal, Assembler::pn, L_done); } if (move_keep3 && fix_arg_base) { @@ -1999,8 +1944,7 @@ } if (emit_guard) { - __ ba(false, L_done); // assumes emit_move_up is true also - __ delayed()->nop(); + __ ba_short(L_done); // assumes emit_move_up is true also __ BIND(L_move_up); } @@ -2133,8 +2077,7 @@ #ifdef ASSERT { Label L_ok; - __ br_notnull(O7_temp, false, Assembler::pt, L_ok); - __ delayed()->nop(); + __ br_notnull_short(O7_temp, Assembler::pt, L_ok); __ stop("bad method handle return"); __ BIND(L_ok); } @@ -2192,11 +2135,10 @@ Label L_skip; if (length_constant < 0) { load_conversion_vminfo(_masm, G3_amh_conversion, O3_scratch); - __ br_zero(Assembler::notZero, false, Assembler::pn, O3_scratch, L_skip); - __ delayed()->nop(); + __ cmp_zero_and_br(Assembler::notZero, O3_scratch, L_skip); + __ delayed()->nop(); // to avoid back-to-back cbcond instructions } - __ br_null(O1_array, false, Assembler::pn, L_array_is_empty); - __ delayed()->nop(); + __ br_null_short(O1_array, Assembler::pn, L_array_is_empty); __ BIND(L_skip); } __ null_check(O1_array, oopDesc::klass_offset_in_bytes()); @@ -2210,8 +2152,7 @@ Label L_ok_array_klass, L_bad_array_klass, L_bad_array_length; __ check_klass_subtype(O2_array_klass, O3_klass, O4_scratch, G5_scratch, L_ok_array_klass); // If we get here, the type check failed! - __ ba(false, L_bad_array_klass); - __ delayed()->nop(); + __ ba_short(L_bad_array_klass); __ BIND(L_ok_array_klass); // Check length. @@ -2247,8 +2188,7 @@ __ BIND(L_array_is_empty); remove_arg_slots(_masm, -stack_move_unit() * array_slots, O0_argslot, O1_scratch, O2_scratch, O3_scratch); - __ ba(false, L_args_done); // no spreading to do - __ delayed()->nop(); + __ ba_short(L_args_done); // no spreading to do __ BIND(L_insert_arg_space); // come here in the usual case, stack_move < 0 (2 or more spread arguments) // Live: O1_array, O2_argslot_limit, O3_stack_move @@ -2289,9 +2229,7 @@ Address(O1_source, 0), Address(O4_fill_ptr, 0), O2_scratch); // must be an even register for !_LP64 long moves (uses O2/O3) __ add(O1_source, type2aelembytes(elem_type), O1_source); - __ cmp(O4_fill_ptr, O0_argslot); - __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop); - __ delayed()->nop(); // FILLME + __ cmp_and_brx_short(O4_fill_ptr, O0_argslot, Assembler::greaterUnsigned, Assembler::pt, L_loop); } else if (length_constant == 0) { // nothing to copy } else { diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -600,7 +600,7 @@ void AdapterGenerator::patch_callers_callsite() { Label L; __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); - __ br_null(G3_scratch, false, __ pt, L); + __ br_null(G3_scratch, false, Assembler::pt, L); // Schedule the branch target address early. __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); // Call into the VM to patch the caller, then jump to compiled callee @@ -1127,8 +1127,7 @@ Label loop; __ bind(loop); __ sub(L0, 1, L0); - __ br_null(L0, false, Assembler::pt, loop); - __ delayed()->nop(); + __ br_null_short(L0, Assembler::pt, loop); __ restore(); } @@ -1202,7 +1201,7 @@ // the call site corrected. __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); __ bind(ok2); - __ br_null(G3_scratch, false, __ pt, skip_fixup); + __ br_null(G3_scratch, false, Assembler::pt, skip_fixup); __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); __ jump_to(ic_miss, G3_scratch); __ delayed()->nop(); @@ -1779,9 +1778,7 @@ AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); __ verify_oop(O0); __ load_klass(O0, temp_reg); - __ cmp(temp_reg, G5_inline_cache_reg); - __ brx(Assembler::equal, true, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); __ jump_to(ic_miss, temp_reg); __ delayed()->nop(); @@ -2182,8 +2179,7 @@ #ifdef ASSERT { Label L; __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); - __ br_null(O0, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_null_short(O0, Assembler::pt, L); __ stop("no pending exception allowed on exit from IR::monitorenter"); __ bind(L); } @@ -2298,9 +2294,7 @@ Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); __ br(Assembler::notEqual, false, Assembler::pn, L); __ delayed()->ld(suspend_state, G3_scratch); - __ cmp(G3_scratch, 0); - __ br(Assembler::equal, false, Assembler::pt, no_block); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); __ bind(L); // Block. Save any potential method result value before the operation and @@ -2328,9 +2322,7 @@ Label no_reguard; __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); - __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled); - __ br(Assembler::notEqual, false, Assembler::pt, no_reguard); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard); save_native_result(masm, ret_type, stack_slots); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); @@ -2382,8 +2374,7 @@ #ifdef ASSERT { Label L; __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); - __ br_null(O0, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_null_short(O0, Assembler::pt, L); __ stop("no pending exception allowed on exit from IR::monitorexit"); __ bind(L); } @@ -2639,9 +2630,7 @@ AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); __ verify_oop(O0); __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); - __ cmp(temp_reg, G5_inline_cache_reg); - __ brx(Assembler::equal, true, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); __ jump_to(ic_miss, temp_reg); __ delayed()->nop(); @@ -3143,8 +3132,7 @@ gen_new_frame(masm, deopt); // allocate an interpreter frame - __ tst(O4array_size); - __ br(Assembler::notZero, false, Assembler::pn, loop); + __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); __ delayed()->add(O3array, wordSize, O3array); __ ld_ptr(G3pcs, 0, O7); // load final frame new pc @@ -3221,7 +3209,7 @@ // pc is now in O7. Return values are still in the expected places map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); - __ ba(false, cont); + __ ba(cont); __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); int exception_offset = __ offset() - start; @@ -3256,8 +3244,7 @@ // verify that there is really an exception oop in exception_oop Label has_exception; __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); - __ br_notnull(Oexception, false, Assembler::pt, has_exception); - __ delayed()-> nop(); + __ br_notnull_short(Oexception, Assembler::pt, has_exception); __ stop("no exception in thread"); __ bind(has_exception); @@ -3265,14 +3252,13 @@ Label no_pending_exception; Address exception_addr(G2_thread, Thread::pending_exception_offset()); __ ld_ptr(exception_addr, Oexception); - __ br_null(Oexception, false, Assembler::pt, no_pending_exception); - __ delayed()->nop(); + __ br_null_short(Oexception, Assembler::pt, no_pending_exception); __ stop("must not have pending exception here"); __ bind(no_pending_exception); } #endif - __ ba(false, cont); + __ ba(cont); __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; // @@ -3313,9 +3299,7 @@ RegisterSaver::restore_result_registers(masm); Label noException; - __ cmp(G4deopt_mode, Deoptimization::Unpack_exception); // Was exception pending? - __ br(Assembler::notEqual, false, Assembler::pt, noException); - __ delayed()->nop(); + __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); // Move the pending exception from exception_oop to Oexception so // the pending exception will be picked up the interpreter. @@ -3359,9 +3343,7 @@ // In 32 bit, C2 returns longs in G1 so restore the saved G1 into // I0/I1 if the return value is long. Label not_long; - __ cmp(O0,T_LONG); - __ br(Assembler::notEqual, false, Assembler::pt, not_long); - __ delayed()->nop(); + __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long); __ ldd(saved_Greturn1_addr,I0); __ bind(not_long); #endif @@ -3534,9 +3516,7 @@ Label pending; __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); - __ tst(O1); - __ brx(Assembler::notEqual, true, Assembler::pn, pending); - __ delayed()->nop(); + __ br_notnull_short(O1, Assembler::pn, pending); RegisterSaver::restore_live_registers(masm); @@ -3623,9 +3603,7 @@ Label pending; __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); - __ tst(O1); - __ brx(Assembler::notEqual, true, Assembler::pn, pending); - __ delayed()->nop(); + __ br_notnull_short(O1, Assembler::pn, pending); // get the returned methodOop diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Thu Jul 21 11:25:07 2011 -0700 @@ -1693,7 +1693,6 @@ void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler _masm(&cbuf); - Label L; Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); Register temp_reg = G3; assert( G5_ic_reg != temp_reg, "conflicting registers" ); @@ -2315,60 +2314,23 @@ __ delayed()->nop(); %} - enc_class enc_bp( Label labl, cmpOp cmp, flagsReg cc ) %{ - MacroAssembler _masm(&cbuf); - Label &L = *($labl$$label); - Assembler::Predict predict_taken = - cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn; - - __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, L); - __ delayed()->nop(); - %} - - enc_class enc_bpl( Label labl, cmpOp cmp, flagsRegL cc ) %{ + enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{ MacroAssembler _masm(&cbuf); - Label &L = *($labl$$label); - Assembler::Predict predict_taken = - cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn; - - __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, L); - __ delayed()->nop(); - %} - - enc_class enc_bpx( Label labl, cmpOp cmp, flagsRegP cc ) %{ - MacroAssembler _masm(&cbuf); - Label &L = *($labl$$label); + Label* L = $labl$$label; Assembler::Predict predict_taken = - cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn; - - __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, L); - __ delayed()->nop(); - %} - - enc_class enc_fbp( Label labl, cmpOpF cmp, flagsRegF cc ) %{ - MacroAssembler _masm(&cbuf); - Label &L = *($labl$$label); - Assembler::Predict predict_taken = - cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn; - - __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($cc$$reg), predict_taken, L); + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L); __ delayed()->nop(); %} - enc_class enc_ba( Label labl ) %{ + enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{ MacroAssembler _masm(&cbuf); - Label &L = *($labl$$label); - __ ba(false, L); - __ delayed()->nop(); - %} - - enc_class enc_bpr( Label labl, cmpOp_reg cmp, iRegI op1 ) %{ - MacroAssembler _masm(&cbuf); - Label &L = *$labl$$label; + Label* L = $labl$$label; Assembler::Predict predict_taken = - cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn; - - __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), L); + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), *L); __ delayed()->nop(); %} @@ -2986,7 +2948,7 @@ __ brx(Assembler::equal, true, Assembler::pn, Ldone); __ delayed()->add(G0, 1, result_reg); - __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, cnt_reg, Ldone); + __ cmp_zero_and_br(Assembler::zero, cnt_reg, Ldone, true, Assembler::pn); __ delayed()->add(G0, 1, result_reg); // count == 0 //rename registers @@ -3006,7 +2968,7 @@ // Compare char[] arrays aligned to 4 bytes. __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg, chr1_reg, chr2_reg, Ldone); - __ ba(false,Ldone); + __ ba(Ldone); __ delayed()->add(G0, 1, result_reg); // char by char compare @@ -3065,7 +3027,7 @@ __ br(Assembler::notEqual, true, Assembler::pn, Ldone); __ delayed()->mov(G0, result_reg); // not equal - __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, tmp1_reg, Ldone); + __ cmp_zero_and_br(Assembler::zero, tmp1_reg, Ldone, true, Assembler::pn); __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal // load array addresses @@ -9232,9 +9194,11 @@ size(8); ins_cost(BRANCH_COST); format %{ "BA $labl" %} - // Prim = bits 24-22, Secnd = bits 31-30, Tert = cond - opcode(Assembler::br_op2, Assembler::branch_op, Assembler::always); - ins_encode( enc_ba( labl ) ); + ins_encode %{ + Label* L = $labl$$label; + __ ba(*L); + __ delayed()->nop(); + %} ins_pc_relative(1); ins_pipe(br); %} @@ -9314,8 +9278,14 @@ size(8); ins_cost(BRANCH_COST); format %{ "BP$cmp $pcc,$labl" %} - // Prim = bits 24-22, Secnd = bits 31-30 - ins_encode( enc_bpx( labl, cmp, pcc ) ); + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L); + __ delayed()->nop(); + %} ins_pc_relative(1); ins_pipe(br_cc); %} @@ -9327,8 +9297,14 @@ size(8); ins_cost(BRANCH_COST); format %{ "FBP$cmp $fcc,$labl" %} - // Prim = bits 24-22, Secnd = bits 31-30 - ins_encode( enc_fbp( labl, cmp, fcc ) ); + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L); + __ delayed()->nop(); + %} ins_pc_relative(1); ins_pipe(br_fcc); %} @@ -9387,8 +9363,14 @@ size(8); ins_cost(BRANCH_COST); format %{ "BP$cmp $xcc,$labl" %} - // Prim = bits 24-22, Secnd = bits 31-30 - ins_encode( enc_bpl( labl, cmp, xcc ) ); + ins_encode %{ + Label* L = $labl$$label; + Assembler::Predict predict_taken = + cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn; + + __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L); + __ delayed()->nop(); + %} ins_pc_relative(1); ins_pipe(br_cc); %} @@ -9707,7 +9689,6 @@ effect(KILL scratch, TEMP scratch2); ins_cost(100); - size(4*112); // conservative overestimation ... format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $box" %} ins_encode( Fast_Lock(object, box, scratch, scratch2) ); ins_pipe(long_memory_op); @@ -9719,7 +9700,6 @@ effect(KILL scratch, TEMP scratch2); ins_cost(100); - size(4*120); // conservative overestimation ... format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %} ins_encode( Fast_Unlock(object, box, scratch, scratch2) ); ins_pipe(long_memory_op); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -150,8 +150,7 @@ { const Register t = G3_scratch; Label L; __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t); - __ br_null(t, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_null_short(t, Assembler::pt, L); __ stop("StubRoutines::call_stub: entered with pending exception"); __ bind(L); } @@ -207,8 +206,7 @@ Label exit; __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter __ add( FP, STACK_BIAS, dst ); - __ tst(cnt); - __ br(Assembler::zero, false, Assembler::pn, exit); + __ cmp_zero_and_br(Assembler::zero, cnt, exit); __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args // copy parameters if any @@ -282,20 +280,20 @@ __ delayed()->restore(); __ BIND(is_object); - __ ba(false, exit); + __ ba(exit); __ delayed()->st_ptr(O0, addr, G0); __ BIND(is_float); - __ ba(false, exit); + __ ba(exit); __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); __ BIND(is_double); - __ ba(false, exit); + __ ba(exit); __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); __ BIND(is_long); #ifdef _LP64 - __ ba(false, exit); + __ ba(exit); __ delayed()->st_long(O0, addr, G0); // store entire long #else #if defined(COMPILER2) @@ -307,11 +305,11 @@ // do this here. Unfortunately if we did a rethrow we'd see an machepilog node // first which would move g1 -> O0/O1 and destroy the exception we were throwing. - __ ba(false, exit); + __ ba(exit); __ delayed()->stx(G1, addr, G0); // store entire long #else __ st(O1, addr, BytesPerInt); - __ ba(false, exit); + __ ba(exit); __ delayed()->st(O0, addr, G0); #endif /* COMPILER2 */ #endif /* _LP64 */ @@ -382,8 +380,7 @@ // make sure that this code is only executed if there is a pending exception { Label L; __ ld_ptr(exception_addr, Gtemp); - __ br_notnull(Gtemp, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(Gtemp, Assembler::pt, L); __ stop("StubRoutines::forward exception: no pending exception (1)"); __ bind(L); } @@ -406,8 +403,7 @@ #ifdef ASSERT // make sure exception is set { Label L; - __ br_notnull(Oexception, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(Oexception, Assembler::pt, L); __ stop("StubRoutines::forward exception: no pending exception (2)"); __ bind(L); } @@ -501,8 +497,7 @@ Address exception_addr(G2_thread, Thread::pending_exception_offset()); Register scratch_reg = Gtemp; __ ld_ptr(exception_addr, scratch_reg); - __ br_notnull(scratch_reg, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(scratch_reg, Assembler::pt, L); __ should_not_reach_here(); __ bind(L); #endif // ASSERT @@ -614,9 +609,7 @@ __ mov(G0,yield_reg); __ BIND(retry); - __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); - __ br(Assembler::less, false, Assembler::pt, dontyield); - __ delayed()->nop(); + __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield); // This code can only be called from inside the VM, this // stub is only invoked from Atomic::add(). We do not @@ -676,9 +669,7 @@ // try to replace O2 with O3 __ cas_under_lock(O1, O2, O3, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false); - __ cmp(O2, O3); - __ br(Assembler::notEqual, false, Assembler::pn, retry); - __ delayed()->nop(); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); __ retl(false); __ delayed()->mov(O2, O0); // report previous value to caller @@ -798,11 +789,9 @@ __ BIND(retry); __ lduw(O1, 0, O2); - __ add(O0, O2, O3); - __ cas(O1, O2, O3); - __ cmp( O2, O3); - __ br(Assembler::notEqual, false, Assembler::pn, retry); - __ delayed()->nop(); + __ add(O0, O2, O3); + __ cas(O1, O2, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry); __ retl(false); __ delayed()->add(O0, O2, O0); // note that cas made O2==O3 } else { @@ -1370,8 +1359,7 @@ // copy tailing bytes __ BIND(L_copy_byte); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ align(OptoLoopAlignment); __ BIND(L_copy_byte_loop); __ ldub(from, offset, O3); @@ -1482,8 +1470,7 @@ // copy 1 element (2 bytes) at a time __ BIND(L_copy_byte); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ align(OptoLoopAlignment); __ BIND(L_copy_byte_loop); __ dec(end_from); @@ -1600,8 +1587,7 @@ // copy 1 element at a time __ BIND(L_copy_2_bytes); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ align(OptoLoopAlignment); __ BIND(L_copy_2_bytes_loop); __ lduh(from, offset, O3); @@ -1946,8 +1932,7 @@ // copy 1 element (2 bytes) at a time __ BIND(L_copy_2_bytes); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ BIND(L_copy_2_bytes_loop); __ dec(end_from, 2); __ dec(end_to, 2); @@ -2060,8 +2045,7 @@ // copy 1 element at a time __ BIND(L_copy_4_bytes); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ BIND(L_copy_4_bytes_loop); __ ld(from, offset, O3); __ deccc(count); @@ -2193,8 +2177,7 @@ // copy 1 element (4 bytes) at a time __ BIND(L_copy_4_bytes); - __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit); - __ delayed()->nop(); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); __ BIND(L_copy_4_bytes_loop); __ dec(end_from, 4); __ dec(end_to, 4); @@ -2576,7 +2559,7 @@ super_klass->after_save(), L0, L1, L2, L4, NULL, &L_pop_to_miss); - __ ba(false, L_success); + __ ba(L_success); __ delayed()->restore(); __ bind(L_pop_to_miss); @@ -2673,8 +2656,7 @@ // ======== loop entry is here ======== __ BIND(load_element); __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop - __ br_null(G3_oop, true, Assembler::pt, store_element); - __ delayed()->nop(); + __ br_null_short(G3_oop, Assembler::pt, store_element); __ load_klass(G3_oop, G4_klass); // query the object klass @@ -2896,8 +2878,7 @@ // assert(src->klass() != NULL); BLOCK_COMMENT("assert klasses not null"); { Label L_a, L_b; - __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL - __ delayed()->nop(); + __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL __ bind(L_a); __ stop("broken null klass"); __ bind(L_b); @@ -2937,9 +2918,7 @@ } // if (src->klass() != dst->klass()) return -1; - __ cmp(G3_src_klass, G4_dst_klass); - __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); - __ delayed()->nop(); + __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed); // if (!src->is_Array()) return -1; __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0 @@ -3007,9 +2986,7 @@ __ delayed()->signx(length, count); // length #ifdef ASSERT { Label L; - __ cmp(G3_elsize, LogBytesPerLong); - __ br(Assembler::equal, false, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L); __ stop("must be long copy, but elsize is wrong"); __ bind(L); } diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -190,9 +190,7 @@ const Register size = G1_scratch; if (EnableInvokeDynamic) { __ ldub(Address(Lbcp, 0), G1_scratch); // Load current bytecode. - __ cmp(G1_scratch, Bytecodes::_invokedynamic); - __ br(Assembler::equal, false, Assembler::pn, L_giant_index); - __ delayed()->nop(); + __ cmp_and_br_short(G1_scratch, Bytecodes::_invokedynamic, Assembler::equal, Assembler::pn, L_giant_index); } __ get_cache_and_index_at_bcp(cache, G1_scratch, 1); __ bind(L_got_cache); @@ -207,8 +205,7 @@ if (EnableInvokeDynamic) { __ bind(L_giant_index); __ get_cache_and_index_at_bcp(cache, G1_scratch, 1, sizeof(u4)); - __ ba(false, L_got_cache); - __ delayed()->nop(); + __ ba_short(L_got_cache); } return entry; @@ -221,9 +218,7 @@ { Label L; Address exception_addr(G2_thread, Thread::pending_exception_offset()); __ ld_ptr(exception_addr, Gtemp); // Load pending exception. - __ tst(Gtemp); - __ brx(Assembler::equal, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_null_short(Gtemp, Assembler::pt, L); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); __ should_not_reach_here(); __ bind(L); @@ -304,8 +299,7 @@ if (ProfileInterpreter) { // If no method data exists, go to profile_continue. __ ld_ptr(Lmethod, methodOopDesc::method_data_offset(), G4_scratch); - __ br_null(G4_scratch, false, Assembler::pn, no_mdo); - __ delayed()->nop(); + __ br_null_short(G4_scratch, Assembler::pn, no_mdo); // Increment counter Address mdo_invocation_counter(G4_scratch, in_bytes(methodDataOopDesc::invocation_counter_offset()) + @@ -313,8 +307,7 @@ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, G3_scratch, Lscratch, Assembler::zero, overflow); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); } // Increment counter in methodOop @@ -340,9 +333,7 @@ // Test to see if we should create a method data oop AddressLiteral profile_limit((address)&InvocationCounter::InterpreterProfileLimit); __ load_contents(profile_limit, G3_scratch); - __ cmp(O0, G3_scratch); - __ br(Assembler::lessUnsigned, false, Assembler::pn, *profile_method_continue); - __ delayed()->nop(); + __ cmp_and_br_short(O0, G3_scratch, Assembler::lessUnsigned, Assembler::pn, *profile_method_continue); // if no method data exists, go to profile_method __ test_method_data_pointer(*profile_method); @@ -351,7 +342,7 @@ AddressLiteral invocation_limit((address)&InvocationCounter::InterpreterInvocationLimit); __ load_contents(invocation_limit, G3_scratch); __ cmp(O0, G3_scratch); - __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow); + __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow); // Far distance __ delayed()->nop(); } @@ -410,19 +401,14 @@ assert_different_registers(Rframe_size, Rscratch, Rscratch2); - __ set( page_size, Rscratch ); - __ cmp( Rframe_size, Rscratch ); - - __ br( Assembler::lessEqual, false, Assembler::pt, after_frame_check ); - __ delayed()->nop(); + __ set(page_size, Rscratch); + __ cmp_and_br_short(Rframe_size, Rscratch, Assembler::lessEqual, Assembler::pt, after_frame_check); // get the stack base, and in debug, verify it is non-zero __ ld_ptr( G2_thread, Thread::stack_base_offset(), Rscratch ); #ifdef ASSERT Label base_not_zero; - __ cmp( Rscratch, G0 ); - __ brx( Assembler::notEqual, false, Assembler::pn, base_not_zero ); - __ delayed()->nop(); + __ br_notnull_short(Rscratch, Assembler::pn, base_not_zero); __ stop("stack base is zero in generate_stack_overflow_check"); __ bind(base_not_zero); #endif @@ -432,9 +418,7 @@ __ ld_ptr( G2_thread, Thread::stack_size_offset(), Rscratch2 ); #ifdef ASSERT Label size_not_zero; - __ cmp( Rscratch2, G0 ); - __ brx( Assembler::notEqual, false, Assembler::pn, size_not_zero ); - __ delayed()->nop(); + __ br_notnull_short(Rscratch2, Assembler::pn, size_not_zero); __ stop("stack size is zero in generate_stack_overflow_check"); __ bind(size_not_zero); #endif @@ -450,9 +434,7 @@ // the frame is greater than one page in size, so check against // the bottom of the stack - __ cmp( SP, Rscratch ); - __ brx( Assembler::greater, false, Assembler::pt, after_frame_check ); - __ delayed()->nop(); + __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check); // Save the return address as the exception pc __ st_ptr(O7, saved_exception_pc); @@ -624,9 +606,7 @@ // If we need a safepoint check, generate full interpreter entry. AddressLiteral sync_state(SafepointSynchronize::address_of_state()); __ set(sync_state, G3_scratch); - __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); - __ br(Assembler::notEqual, false, Assembler::pn, slow_path); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, SafepointSynchronize::_not_synchronized, Assembler::notEqual, Assembler::pn, slow_path); // Code: _return __ retl(); @@ -664,14 +644,12 @@ AddressLiteral sync_state(SafepointSynchronize::address_of_state()); __ load_contents(sync_state, G3_scratch); __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); - __ br(Assembler::notEqual, false, Assembler::pn, slow_path); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, SafepointSynchronize::_not_synchronized, Assembler::notEqual, Assembler::pn, slow_path); // Check if local 0 != NULL __ ld_ptr(Gargs, G0, Otos_i ); // get local 0 - __ tst(Otos_i); // check if local 0 == NULL and go the slow path - __ brx(Assembler::zero, false, Assembler::pn, slow_path); - __ delayed()->nop(); + // check if local 0 == NULL and go the slow path + __ br_null_short(Otos_i, Assembler::pn, slow_path); // read first instruction word and extract bytecode @ 1 and index @ 2 @@ -697,9 +675,7 @@ __ ld_ptr(G3_scratch, cp_base_offset + ConstantPoolCacheEntry::indices_offset(), G1_scratch); __ srl(G1_scratch, 2*BitsPerByte, G1_scratch); __ and3(G1_scratch, 0xFF, G1_scratch); - __ cmp(G1_scratch, Bytecodes::_getfield); - __ br(Assembler::notEqual, false, Assembler::pn, slow_path); - __ delayed()->nop(); + __ cmp_and_br_short(G1_scratch, Bytecodes::_getfield, Assembler::notEqual, Assembler::pn, slow_path); // Get the type and return field offset from the constant pool cache __ ld_ptr(G3_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), G1_scratch); @@ -787,9 +763,8 @@ // Check if local 0 != NULL // If the receiver is null then it is OK to jump to the slow path. __ ld_ptr(Gargs, G0, Otos_i ); // get local 0 - __ tst(Otos_i); // check if local 0 == NULL and go the slow path - __ brx(Assembler::zero, false, Assembler::pn, slow_path); - __ delayed()->nop(); + // check if local 0 == NULL and go the slow path + __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path); // Load the value of the referent field. @@ -952,9 +927,7 @@ { Label L; Address signature_handler(Lmethod, methodOopDesc::signature_handler_offset()); __ ld_ptr(signature_handler, G3_scratch); - __ tst(G3_scratch); - __ brx(Assembler::notZero, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(G3_scratch, Assembler::pt, L); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), Lmethod); __ ld_ptr(signature_handler, G3_scratch); __ bind(L); @@ -1019,9 +992,7 @@ #ifdef ASSERT if (!PrintSignatureHandlers) // do not dirty the output with this { Label L; - __ tst(O1); - __ brx(Assembler::notZero, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(O1, Assembler::pt, L); __ stop("mirror is missing"); __ bind(L); } @@ -1038,9 +1009,7 @@ #ifdef ASSERT { Label L; - __ tst(O0); - __ brx(Assembler::notZero, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(O0, Assembler::pt, L); __ stop("native entry point is missing"); __ bind(L); } @@ -1079,9 +1048,7 @@ #ifdef ASSERT { Label L; __ ld(thread_state, G3_scratch); - __ cmp(G3_scratch, _thread_in_Java); - __ br(Assembler::equal, false, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, _thread_in_Java, Assembler::equal, Assembler::pt, L); __ stop("Wrong thread state in native stub"); __ bind(L); } @@ -1134,9 +1101,7 @@ Label L; __ br(Assembler::notEqual, false, Assembler::pn, L); __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch); - __ cmp(G3_scratch, 0); - __ br(Assembler::equal, false, Assembler::pt, no_block); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); __ bind(L); // Block. Save any potential method result value before the operation and @@ -1185,9 +1150,7 @@ Label no_oop, store_result; __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch); - __ cmp(G3_scratch, Lscratch); - __ brx(Assembler::notEqual, false, Assembler::pt, no_oop); - __ delayed()->nop(); + __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop); __ addcc(G0, O0, O0); __ brx(Assembler::notZero, true, Assembler::pt, store_result); // if result is not NULL: __ delayed()->ld_ptr(O0, 0, O0); // unbox it @@ -1206,9 +1169,7 @@ { Label L; Address exception_addr(G2_thread, Thread::pending_exception_offset()); __ ld_ptr(exception_addr, Gtemp); - __ tst(Gtemp); - __ brx(Assembler::equal, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_null_short(Gtemp, Assembler::pt, L); // Note: This could be handled more efficiently since we know that the native // method doesn't have an exception handler. We could directly return // to the exception handler for the caller. @@ -1245,9 +1206,7 @@ #ifdef ASSERT { Label ok; - __ cmp(I5_savedSP, FP); - __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, ok); - __ delayed()->nop(); + __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok); __ stop("bad I5_savedSP value"); __ should_not_reach_here(); __ bind(ok); @@ -1429,8 +1388,7 @@ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); __ set_method_data_pointer_for_bcp(); - __ ba(false, profile_method_continue); - __ delayed()->nop(); + __ ba_short(profile_method_continue); } // handle invocation counter overflow @@ -1856,9 +1814,7 @@ // adapter frames in C2. Label caller_not_deoptimized; __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), I7); - __ tst(O0); - __ brx(Assembler::notEqual, false, Assembler::pt, caller_not_deoptimized); - __ delayed()->nop(); + __ br_notnull_short(O0, Assembler::pt, caller_not_deoptimized); const Register Gtmp1 = G3_scratch; const Register Gtmp2 = G1_scratch; @@ -1992,10 +1948,10 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) { assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; - aep = __ pc(); __ push_ptr(); __ ba(false, L); __ delayed()->nop(); - fep = __ pc(); __ push_f(); __ ba(false, L); __ delayed()->nop(); - dep = __ pc(); __ push_d(); __ ba(false, L); __ delayed()->nop(); - lep = __ pc(); __ push_l(); __ ba(false, L); __ delayed()->nop(); + aep = __ pc(); __ push_ptr(); __ ba_short(L); + fep = __ pc(); __ push_f(); __ ba_short(L); + dep = __ pc(); __ push_d(); __ ba_short(L); + lep = __ pc(); __ push_l(); __ ba_short(L); iep = __ pc(); __ push_i(); bep = cep = sep = iep; // there aren't any vep = __ pc(); __ bind(L); // fall through diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -159,13 +159,10 @@ if (JvmtiExport::can_post_breakpoint()) { Label fast_patch; __ ldub(at_bcp(0), Rscratch); - __ cmp(Rscratch, Bytecodes::_breakpoint); - __ br(Assembler::notEqual, false, Assembler::pt, fast_patch); - __ delayed()->nop(); // don't bother to hoist the stb here + __ cmp_and_br_short(Rscratch, Bytecodes::_breakpoint, Assembler::notEqual, Assembler::pt, fast_patch); // perform the quickening, slowly, in the bowels of the breakpoint table __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), Lmethod, Lbcp, Rbyte_code); - __ ba(false, patch_done); - __ delayed()->nop(); + __ ba_short(patch_done); __ bind(fast_patch); } #ifdef ASSERT @@ -281,17 +278,14 @@ // get type from tags __ add(O2, tags_offset, O2); __ ldub(O2, O1, O2); - __ cmp(O2, JVM_CONSTANT_UnresolvedString); // unresolved string? If so, must resolve - __ brx(Assembler::equal, true, Assembler::pt, call_ldc); - __ delayed()->nop(); - - __ cmp(O2, JVM_CONSTANT_UnresolvedClass); // unresolved class? If so, must resolve - __ brx(Assembler::equal, true, Assembler::pt, call_ldc); - __ delayed()->nop(); - - __ cmp(O2, JVM_CONSTANT_UnresolvedClassInError); // unresolved class in error state - __ brx(Assembler::equal, true, Assembler::pn, call_ldc); - __ delayed()->nop(); + // unresolved string? If so, must resolve + __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedString, Assembler::equal, Assembler::pt, call_ldc); + + // unresolved class? If so, must resolve + __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClass, Assembler::equal, Assembler::pt, call_ldc); + + // unresolved class in error state + __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClassInError, Assembler::equal, Assembler::pn, call_ldc); __ cmp(O2, JVM_CONSTANT_Class); // need to call vm to get java mirror of the class __ brx(Assembler::notEqual, true, Assembler::pt, notClass); @@ -301,8 +295,7 @@ __ set(wide, O1); call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), O1); __ push(atos); - __ ba(false, exit); - __ delayed()->nop(); + __ ba_short(exit); __ bind(notClass); // __ add(O0, base_offset, O0); @@ -312,8 +305,7 @@ __ delayed()->cmp(O2, JVM_CONSTANT_String); __ ld(O0, O1, Otos_i); __ push(itos); - __ ba(false, exit); - __ delayed()->nop(); + __ ba_short(exit); __ bind(notInt); // __ cmp(O2, JVM_CONSTANT_String); @@ -325,8 +317,7 @@ __ ld_ptr(O0, O1, Otos_i); __ verify_oop(Otos_i); __ push(atos); - __ ba(false, exit); - __ delayed()->nop(); + __ ba_short(exit); __ bind(notString); // __ ldf(FloatRegisterImpl::S, O0, O1, Ftos_f); @@ -365,9 +356,7 @@ __ load_klass(Otos_i, Rcon_klass); AddressLiteral array_klass_addr((address)Universe::systemObjArrayKlassObj_addr()); __ load_contents(array_klass_addr, Rarray_klass); - __ cmp(Rarray_klass, Rcon_klass); - __ brx(Assembler::notEqual, false, Assembler::pt, L_done); - __ delayed()->nop(); + __ cmp_and_brx_short(Rarray_klass, Rcon_klass, Assembler::notEqual, Assembler::pt, L_done); __ ld(Address(Otos_i, arrayOopDesc::length_offset_in_bytes()), Rcon_klass); __ tst(Rcon_klass); __ brx(Assembler::zero, true, Assembler::pt, L_done); @@ -397,9 +386,7 @@ __ sll(O1, LogBytesPerWord, O1); __ add(O0, O1, G3_scratch); - __ cmp(O2, JVM_CONSTANT_Double); - __ brx(Assembler::notEqual, false, Assembler::pt, Long); - __ delayed()->nop(); + __ cmp_and_brx_short(O2, JVM_CONSTANT_Double, Assembler::notEqual, Assembler::pt, Long); // A double can be placed at word-aligned locations in the constant pool. // Check out Conversions.java for an example. // Also constantPoolOopDesc::header_size() is 20, which makes it very difficult @@ -413,8 +400,7 @@ f->successor()); #endif __ push(dtos); - __ ba(false, exit); - __ delayed()->nop(); + __ ba_short(exit); __ bind(Long); #ifdef _LP64 @@ -453,9 +439,7 @@ // last two iloads in a pair. Comparing against fast_iload means that // the next bytecode is neither an iload or a caload, and therefore // an iload pair. - __ cmp(G3_scratch, (int)Bytecodes::_iload); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_iload, Assembler::equal, Assembler::pn, done); __ cmp(G3_scratch, (int)Bytecodes::_fast_iload); __ br(Assembler::equal, false, Assembler::pn, rewrite); @@ -697,9 +681,7 @@ aload(0); // if _getfield then wait with rewrite - __ cmp(G3_scratch, (int)Bytecodes::_getfield); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); + __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_getfield, Assembler::equal, Assembler::pn, done); // if _igetfield then rewrite to _fast_iaccess_0 assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "adjust fast bytecode def"); @@ -867,8 +849,7 @@ __ index_check_without_pop(O3, O2, UseCompressedOops ? 2 : LogBytesPerWord, G3_scratch, O1); // do array store check - check for NULL value first - __ br_null( Otos_i, false, Assembler::pn, is_null ); - __ delayed()->nop(); + __ br_null_short( Otos_i, Assembler::pn, is_null ); __ load_klass(O3, O4); // get array klass __ load_klass(Otos_i, O5); // get value klass @@ -899,7 +880,7 @@ __ bind(store_ok); do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true); - __ ba(false,done); + __ ba(done); __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize); // adj sp (pops array, index and value) __ bind(is_null); @@ -1633,16 +1614,14 @@ if (ProfileInterpreter) { // If no method data exists, go to profile_continue. __ ld_ptr(Lmethod, methodOopDesc::method_data_offset(), G4_scratch); - __ br_null(G4_scratch, false, Assembler::pn, Lno_mdo); - __ delayed()->nop(); + __ br_null_short(G4_scratch, Assembler::pn, Lno_mdo); // Increment backedge counter in the MDO Address mdo_backedge_counter(G4_scratch, in_bytes(methodDataOopDesc::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset())); __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, G3_scratch, Lscratch, Assembler::notZero, &Lforward); - __ ba(false, Loverflow); - __ delayed()->nop(); + __ ba_short(Loverflow); } // If there's no MDO, increment counter in methodOop @@ -1658,14 +1637,11 @@ // Was an OSR adapter generated? // O0 = osr nmethod - __ br_null(O0, false, Assembler::pn, Lforward); - __ delayed()->nop(); + __ br_null_short(O0, Assembler::pn, Lforward); // Has the nmethod been invalidated already? __ ld(O0, nmethod::entry_bci_offset(), O2); - __ cmp(O2, InvalidOSREntryBci); - __ br(Assembler::equal, false, Assembler::pn, Lforward); - __ delayed()->nop(); + __ cmp_and_br_short(O2, InvalidOSREntryBci, Assembler::equal, Assembler::pn, Lforward); // migrate the interpreter frame off of the stack @@ -1830,7 +1806,7 @@ __ profile_switch_case(O2, O3, G3_scratch, G4_scratch); __ sll(O2, LogBytesPerInt, O2); __ add(O2, 3 * BytesPerInt, O2); - __ ba(false, continue_execution); + __ ba(continue_execution); __ delayed()->ld(O1, O2, O2); // handle default __ bind(default_case); @@ -1858,7 +1834,7 @@ __ ld(O1, BytesPerInt, O2); __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs __ add(O1, 2 * BytesPerInt, O3); // set first pair addr - __ ba(false, loop_entry); + __ ba(loop_entry); __ delayed()->add(O3, O2, O2); // counter now points past last pair // table search @@ -1877,8 +1853,7 @@ __ ld(O1, 0, O4); // get default offset if (ProfileInterpreter) { __ profile_switch_default(O3); - __ ba(false, continue_execution); - __ delayed()->nop(); + __ ba_short(continue_execution); } // entry found -> get offset @@ -1944,7 +1919,7 @@ // and start Label entry; - __ ba(false, entry); + __ ba(entry); __ delayed()->ld( Rarray, -BytesPerInt, Rj); // (Rj is already in the native byte-ordering.) @@ -2002,8 +1977,7 @@ // (Rj is already in the native byte-ordering.) if (ProfileInterpreter) { - __ ba(false, continue_execution); - __ delayed()->nop(); + __ ba_short(continue_execution); } __ bind(default_case); // fall through (if not profiling) @@ -2216,9 +2190,7 @@ assert_different_registers(Rcache, index, G1_scratch); AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr()); __ load_contents(get_field_access_count_addr, G1_scratch); - __ tst(G1_scratch); - __ br(Assembler::zero, false, Assembler::pt, Label1); - __ delayed()->nop(); + __ cmp_and_br_short(G1_scratch, 0, Assembler::equal, Assembler::pt, Label1); __ add(Rcache, in_bytes(cp_base_offset), Rcache); @@ -2298,7 +2270,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_agetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notObj); @@ -2313,7 +2285,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_igetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notInt); @@ -2329,7 +2301,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_lgetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notLong); @@ -2344,7 +2316,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notByte); @@ -2359,7 +2331,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_cgetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notChar); @@ -2374,7 +2346,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_sgetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notShort); @@ -2390,7 +2362,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_fgetfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notFloat); @@ -2499,9 +2471,7 @@ Label done; AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr()); __ load_contents(get_field_modification_count_addr, G4_scratch); - __ tst(G4_scratch); - __ br(Assembler::zero, false, Assembler::pt, done); - __ delayed()->nop(); + __ cmp_and_br_short(G4_scratch, 0, Assembler::equal, Assembler::pt, done); __ pop_ptr(G4_scratch); // copy the object pointer from tos __ verify_oop(G4_scratch); __ push_ptr(G4_scratch); // put the object pointer back on tos @@ -2552,9 +2522,7 @@ assert_different_registers(Rcache, index, G1_scratch); AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr()); __ load_contents(get_field_modification_count_addr, G1_scratch); - __ tst(G1_scratch); - __ br(Assembler::zero, false, Assembler::pt, Label1); - __ delayed()->nop(); + __ cmp_and_br_short(G1_scratch, 0, Assembler::zero, Assembler::pt, Label1); // The Rcache and index registers have been already set. // This allows to eliminate this call but the Rcache and index @@ -2584,8 +2552,7 @@ __ br(Assembler::equal, false, Assembler::pt, two_word); __ delayed()->nop(); __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(1)); - __ br(Assembler::always, false, Assembler::pt, valsizeknown); - __ delayed()->nop(); + __ ba_short(valsizeknown); __ bind(two_word); __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(2)); @@ -2636,9 +2603,7 @@ __ and3(Rflags, Lscratch, Lscratch); if (__ membar_has_effect(read_bits)) { - __ tst(Lscratch); - __ br(Assembler::zero, false, Assembler::pt, notVolatile); - __ delayed()->nop(); + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile); volatile_barrier(read_bits); __ bind(notVolatile); } @@ -2663,7 +2628,7 @@ do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notObj); @@ -2675,7 +2640,7 @@ // itos __ pop_i(); __ st(Otos_i, Rclass, Roffset); - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notInt); @@ -2691,7 +2656,7 @@ pop_and_check_object(Rclass); __ st(Otos_i, Rclass, Roffset); patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch); - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notInt); @@ -2707,7 +2672,7 @@ do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch); - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notObj); @@ -2724,7 +2689,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notByte); @@ -2740,7 +2705,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notLong); @@ -2756,7 +2721,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notChar); @@ -2771,7 +2736,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notShort); @@ -2786,7 +2751,7 @@ if (!is_static) { patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch); } - __ ba(false, checkVolatile); + __ ba(checkVolatile); __ delayed()->tst(Lscratch); __ bind(notFloat); @@ -2833,9 +2798,7 @@ __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch); __ and3(Rflags, Lscratch, Lscratch); if (__ membar_has_effect(read_bits)) { - __ tst(Lscratch); - __ br(Assembler::zero, false, Assembler::pt, notVolatile); - __ delayed()->nop(); + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile); volatile_barrier(read_bits); __ bind(notVolatile); } @@ -2864,9 +2827,7 @@ } if (__ membar_has_effect(write_bits)) { - __ tst(Lscratch); - __ br(Assembler::zero, false, Assembler::pt, exit); - __ delayed()->nop(); + __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, exit); volatile_barrier(Assembler::StoreLoad); __ bind(exit); } @@ -3226,8 +3187,7 @@ // the VM should throw IncompatibleClassChangeError. linkResolver checks // this too but that's only if the entry isn't already resolved, so we // need to check again. - __ br_notnull( Rtemp, false, Assembler::pt, ok); - __ delayed()->nop(); + __ br_notnull_short( Rtemp, Assembler::pt, ok); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError)); __ should_not_reach_here(); __ bind(ok); @@ -3251,9 +3211,7 @@ // Check for abstract method error. { Label ok; - __ tst(G5_method); - __ brx(Assembler::notZero, false, Assembler::pt, ok); - __ delayed()->nop(); + __ br_notnull_short(G5_method, Assembler::pt, ok); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); __ should_not_reach_here(); __ bind(ok); @@ -3408,17 +3366,14 @@ #else __ srl(RfreeValue, LogHeapWordSize, RfreeValue); #endif - __ cmp(RtlabWasteLimitValue, RfreeValue); - __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, slow_case); // tlab waste is small - __ delayed()->nop(); + __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small // increment waste limit to prevent getting stuck on this slow path __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue); __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); } else { // No allocation in the shared eden. - __ br(Assembler::always, false, Assembler::pt, slow_case); - __ delayed()->nop(); + __ ba_short(slow_case); } } @@ -3440,18 +3395,14 @@ // RnewTopValue contains the top address after the new object // has been allocated. - __ cmp(RnewTopValue, RendValue); - __ brx(Assembler::greaterUnsigned, false, Assembler::pn, slow_case); - __ delayed()->nop(); + __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case); __ casx_under_lock(RtopAddr, RoldTopValue, RnewTopValue, VM_Version::v9_instructions_work() ? NULL : (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); // if someone beat us on the allocation, try again, otherwise continue - __ cmp(RoldTopValue, RnewTopValue); - __ brx(Assembler::notEqual, false, Assembler::pn, retry); - __ delayed()->nop(); + __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry); // bump total bytes allocated by this thread // RoldTopValue and RtopAddr are dead, so can use G1 and G3 @@ -3474,8 +3425,7 @@ __ br(Assembler::notEqual, false, Assembler::pt, loop); __ delayed()->subcc(Roffset, wordSize, Roffset); } - __ br(Assembler::always, false, Assembler::pt, initialize_header); - __ delayed()->nop(); + __ ba_short(initialize_header); } // slow case @@ -3485,8 +3435,7 @@ call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), O1, O2); - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); // Initialize the header: mark, klass __ bind(initialize_header); @@ -3550,8 +3499,7 @@ Register RspecifiedKlass = O4; // Check for casting a NULL - __ br_null(Otos_i, false, Assembler::pn, is_null); - __ delayed()->nop(); + __ br_null_short(Otos_i, Assembler::pn, is_null); // Get value klass in RobjKlass __ load_klass(Otos_i, RobjKlass); // get value klass @@ -3571,8 +3519,7 @@ call_VM(RspecifiedKlass, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) ); __ pop_ptr(Otos_i, G3_scratch); // restore receiver - __ br(Assembler::always, false, Assembler::pt, resolved); - __ delayed()->nop(); + __ ba_short(resolved); // Extract target class from constant pool __ bind(quicked); @@ -3591,8 +3538,7 @@ __ bind(cast_ok); if (ProfileInterpreter) { - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); } __ bind(is_null); __ profile_null_seen(G3_scratch); @@ -3608,8 +3554,7 @@ Register RspecifiedKlass = O4; // Check for casting a NULL - __ br_null(Otos_i, false, Assembler::pt, is_null); - __ delayed()->nop(); + __ br_null_short(Otos_i, Assembler::pt, is_null); // Get value klass in RobjKlass __ load_klass(Otos_i, RobjKlass); // get value klass @@ -3629,9 +3574,7 @@ call_VM(RspecifiedKlass, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) ); __ pop_ptr(Otos_i, G3_scratch); // restore receiver - __ br(Assembler::always, false, Assembler::pt, resolved); - __ delayed()->nop(); - + __ ba_short(resolved); // Extract target class from constant pool __ bind(quicked); @@ -3649,8 +3592,7 @@ __ clr( Otos_i ); if (ProfileInterpreter) { - __ ba(false, done); - __ delayed()->nop(); + __ ba_short(done); } __ bind(is_null); __ profile_null_seen(G3_scratch); @@ -3724,7 +3666,7 @@ { Label entry, loop, exit; __ add( __ top_most_monitor(), O2 ); // last one to check - __ ba( false, entry ); + __ ba( entry ); __ delayed()->mov( Lmonitors, O3 ); // first one to check @@ -3757,8 +3699,7 @@ { Label allocated; // found free slot? - __ br_notnull(O1, false, Assembler::pn, allocated); - __ delayed()->nop(); + __ br_notnull_short(O1, Assembler::pn, allocated); __ add_monitor_to_stack( false, O2, O3 ); __ mov(Lmonitors, O1); @@ -3791,7 +3732,7 @@ { Label entry, loop, found; __ add( __ top_most_monitor(), O2 ); // last one to check - __ ba(false, entry ); + __ ba(entry); // use Lscratch to hold monitor elem to check, start with most recent monitor, // By using a local it survives the call to the C routine. __ delayed()->mov( Lmonitors, Lscratch ); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -59,6 +59,11 @@ assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); + if (AllocatePrefetchStyle == 3 && !has_blk_init()) { + warning("BIS instructions are not available on this CPU"); + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + UseSSE = 0; // Only on x86 and x64 _supports_cx8 = has_v9(); @@ -116,27 +121,44 @@ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { FLAG_SET_DEFAULT(UsePopCountInstruction, true); } + } else if (UsePopCountInstruction) { + warning("POPC instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UsePopCountInstruction, false); + } + + // T4 and newer Sparc cpus have new compare and branch instruction. + if (has_cbcond()) { + if (FLAG_IS_DEFAULT(UseCBCond)) { + FLAG_SET_DEFAULT(UseCBCond, true); + } + } else if (UseCBCond) { + warning("CBCOND instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCBCond, false); } #ifdef COMPILER2 + // T4 and newer Sparc cpus have fast RDPC. + if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) { +// FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true); + } + // Currently not supported anywhere. FLAG_SET_DEFAULT(UseFPUForSpilling, false); #endif char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - (has_v8() ? ", has_v8" : ""), - (has_v9() ? ", has_v9" : ""), + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), - (has_vis1() ? ", has_vis1" : ""), - (has_vis2() ? ", has_vis2" : ""), - (has_vis3() ? ", has_vis3" : ""), - (has_blk_init() ? ", has_blk_init" : ""), - (is_ultra3() ? ", is_ultra3" : ""), - (is_sun4v() ? ", is_sun4v" : ""), - (is_niagara() ? ", is_niagara" : ""), - (is_niagara_plus() ? ", is_niagara_plus" : ""), - (is_sparc64() ? ", is_sparc64" : ""), + (has_vis1() ? ", vis1" : ""), + (has_vis2() ? ", vis2" : ""), + (has_vis3() ? ", vis3" : ""), + (has_blk_init() ? ", blk_init" : ""), + (has_cbcond() ? ", cbcond" : ""), + (is_ultra3() ? ", ultra3" : ""), + (is_sun4v() ? ", sun4v" : ""), + (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), + (is_sparc64() ? ", sparc64" : ""), (!has_hardware_mul32() ? ", no-mul32" : ""), (!has_hardware_div32() ? ", no-div32" : ""), (!has_hardware_fsmuld() ? ", no-fsmuld" : "")); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/vm_version_sparc.hpp --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Jul 21 11:25:07 2011 -0700 @@ -31,44 +31,46 @@ class VM_Version: public Abstract_VM_Version { protected: enum Feature_Flag { - v8_instructions = 0, - hardware_mul32 = 1, - hardware_div32 = 2, - hardware_fsmuld = 3, - hardware_popc = 4, - v9_instructions = 5, - vis1_instructions = 6, - vis2_instructions = 7, - sun4v_instructions = 8, + v8_instructions = 0, + hardware_mul32 = 1, + hardware_div32 = 2, + hardware_fsmuld = 3, + hardware_popc = 4, + v9_instructions = 5, + vis1_instructions = 6, + vis2_instructions = 7, + sun4v_instructions = 8, blk_init_instructions = 9, - fmaf_instructions = 10, - fmau_instructions = 11, - vis3_instructions = 12, - sparc64_family = 13, - T_family = 14, - T1_model = 15 + fmaf_instructions = 10, + fmau_instructions = 11, + vis3_instructions = 12, + sparc64_family = 13, + T_family = 14, + T1_model = 15, + cbcond_instructions = 16 }; enum Feature_Flag_Set { unknown_m = 0, all_features_m = -1, - v8_instructions_m = 1 << v8_instructions, - hardware_mul32_m = 1 << hardware_mul32, - hardware_div32_m = 1 << hardware_div32, - hardware_fsmuld_m = 1 << hardware_fsmuld, - hardware_popc_m = 1 << hardware_popc, - v9_instructions_m = 1 << v9_instructions, - vis1_instructions_m = 1 << vis1_instructions, - vis2_instructions_m = 1 << vis2_instructions, - sun4v_m = 1 << sun4v_instructions, + v8_instructions_m = 1 << v8_instructions, + hardware_mul32_m = 1 << hardware_mul32, + hardware_div32_m = 1 << hardware_div32, + hardware_fsmuld_m = 1 << hardware_fsmuld, + hardware_popc_m = 1 << hardware_popc, + v9_instructions_m = 1 << v9_instructions, + vis1_instructions_m = 1 << vis1_instructions, + vis2_instructions_m = 1 << vis2_instructions, + sun4v_m = 1 << sun4v_instructions, blk_init_instructions_m = 1 << blk_init_instructions, - fmaf_instructions_m = 1 << fmaf_instructions, - fmau_instructions_m = 1 << fmau_instructions, - vis3_instructions_m = 1 << vis3_instructions, - sparc64_family_m = 1 << sparc64_family, - T_family_m = 1 << T_family, - T1_model_m = 1 << T1_model, + fmaf_instructions_m = 1 << fmaf_instructions, + fmau_instructions_m = 1 << fmau_instructions, + vis3_instructions_m = 1 << vis3_instructions, + sparc64_family_m = 1 << sparc64_family, + T_family_m = 1 << T_family, + T1_model_m = 1 << T1_model, + cbcond_instructions_m = 1 << cbcond_instructions, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -111,20 +113,25 @@ static bool has_vis2() { return (_features & vis2_instructions_m) != 0; } static bool has_vis3() { return (_features & vis3_instructions_m) != 0; } static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; } + static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); } - static bool is_ultra3() { return (_features & ultra3_m) == ultra3_m; } - static bool is_sun4v() { return (_features & sun4v_m) != 0; } // Returns true if the platform is in the niagara line (T series) // and newer than the niagara1. static bool is_niagara_plus() { return is_T_family(_features) && !is_T1_model(_features); } + // Fujitsu SPARC64 static bool is_sparc64() { return (_features & sparc64_family_m) != 0; } + static bool is_sun4v() { return (_features & sun4v_m) != 0; } + static bool is_ultra3() { return (_features & ultra3_m) == ultra3_m && !is_sun4v() && !is_sparc64(); } + static bool has_fast_fxtof() { return is_niagara() || is_sparc64() || has_v9() && !is_ultra3(); } static bool has_fast_idiv() { return is_niagara_plus() || is_sparc64(); } + // T4 and newer Sparc have fast RDPC instruction. + static bool has_fast_rdpc() { return is_niagara_plus() && has_cbcond(); } static const char* cpu_features() { return _features_str; } diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/sparc/vm/vtableStubs_sparc.cpp --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -76,9 +76,7 @@ Label L; // check offset vs vtable length __ ld(G3_scratch, instanceKlass::vtable_length_offset()*wordSize, G5); - __ cmp(G5, vtable_index*vtableEntry::size()); - __ br(Assembler::greaterUnsigned, false, Assembler::pt, L); - __ delayed()->nop(); + __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L); __ set(vtable_index, O2); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2); __ bind(L); @@ -95,8 +93,7 @@ #ifndef PRODUCT if (DebugVtables) { Label L; - __ br_notnull(G5_method, false, Assembler::pt, L); - __ delayed()->nop(); + __ br_notnull_short(G5_method, Assembler::pt, L); __ stop("Vtable entry is ZERO"); __ bind(L); } @@ -177,8 +174,7 @@ #ifndef PRODUCT if (DebugVtables) { Label L01; - __ bpr(Assembler::rc_nz, false, Assembler::pt, L5_method, L01); - __ delayed()->nop(); + __ br_notnull_short(L5_method, Assembler::pt, L01); __ stop("methodOop is null"); __ bind(L01); __ verify_oop(L5_method); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Thu Jul 21 11:25:07 2011 -0700 @@ -1713,14 +1713,14 @@ else emit_d32(cbuf,con); %} - enc_class Lbl (label labl) %{ // JMP, CALL + enc_class Lbl (label labl) %{ // GOTO Label *l = $labl$$label; - emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); - %} - - enc_class LblShort (label labl) %{ // JMP, CALL + emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4))); + %} + + enc_class LblShort (label labl) %{ // GOTO Label *l = $labl$$label; - int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size()+1); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); emit_d8(cbuf, disp); %} @@ -1751,13 +1751,13 @@ Label *l = $labl$$label; $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); - emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); + emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4))); %} enc_class JccShort (cmpOp cop, label labl) %{ // JCC Label *l = $labl$$label; emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size()+1); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); emit_d8(cbuf, disp); %} @@ -13172,7 +13172,7 @@ bool ok = false; if ($cop$$cmpcode == Assembler::notEqual) { // the two jumps 6 bytes apart so the jump distances are too - parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; + parity_disp = l->loc_pos() - (cbuf.insts_size() + 4); } else if ($cop$$cmpcode == Assembler::equal) { parity_disp = 6; ok = true; @@ -13182,7 +13182,7 @@ emit_d32(cbuf, parity_disp); $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 4); emit_d32(cbuf, disp); %} ins_pipe(pipe_jcc); @@ -13368,7 +13368,7 @@ emit_cc(cbuf, $primary, Assembler::parity); int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { - parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + parity_disp = l->loc_pos() - (cbuf.insts_size() + 1); } else if ($cop$$cmpcode == Assembler::equal) { parity_disp = 2; } else { @@ -13376,7 +13376,7 @@ } emit_d8(cbuf, parity_disp); emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 1); emit_d8(cbuf, disp); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); diff -r 6a991dcb52bb -r 3d42f82cd811 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Thu Jul 21 08:38:25 2011 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu Jul 21 11:25:07 2011 -0700 @@ -2428,16 +2428,16 @@ enc_class Lbl(label labl) %{ - // JMP, CALL + // GOTO Label* l = $labl$$label; - emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0); + emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4))); %} enc_class LblShort(label labl) %{ - // JMP, CALL + // GOTO Label* l = $labl$$label; - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 1); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); emit_d8(cbuf, disp); %} @@ -2466,7 +2466,7 @@ Label* l = $labl$$label; $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); - emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0); + emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size() + 4))); %} enc_class JccShort (cmpOp cop, label labl) @@ -2474,7 +2474,7 @@ // JCC Label *l = $labl$$label; emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 1); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); emit_d8(cbuf, disp); %} @@ -12131,7 +12131,7 @@ int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { // the two jumps 6 bytes apart so the jump distances are too - parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; + parity_disp = l->loc_pos() - (cbuf.insts_size() + 4); } else if ($cop$$cmpcode == Assembler::equal) { parity_disp = 6; } else { @@ -12140,7 +12140,7 @@ emit_d32(cbuf, parity_disp); $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 4); emit_d32(cbuf, disp); %} ins_pipe(pipe_jcc); @@ -12335,7 +12335,7 @@ emit_cc(cbuf, $primary, Assembler::parity); int parity_disp = -1; if ($cop$$cmpcode == Assembler::notEqual) { - parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + parity_disp = l->loc_pos() - (cbuf.insts_size() + 1); } else if ($cop$$cmpcode == Assembler::equal) { parity_disp = 2; } else { @@ -12343,7 +12343,7 @@ } emit_d8(cbuf, parity_disp); emit_cc(cbuf, $primary, $cop$$cmpcode); - int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; + int disp = l->loc_pos() - (cbuf.insts_size() + 1); emit_d8(cbuf, disp); assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); diff -r 6a991dcb52bb -r 3d42f82cd811 src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -114,6 +114,11 @@ #endif if (av & AV_SPARC_VIS3) features |= vis3_instructions_m; +#ifndef AV_SPARC_CBCOND +#define AV_SPARC_CBCOND 0x10000000 /* compare and branch instrs supported */ +#endif + if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; + } else { // getisax(2) failed, use the old legacy code. #ifndef PRODUCT diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/adlc/formssel.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -3623,7 +3623,27 @@ assert( mNode2->_opType, "Must have _opType"); const Form *form = globals[_opType]; const Form *form2 = globals[mNode2->_opType]; - return (form == form2); + if( form != form2 ) { + return false; + } + + // Check that their children also match + if (_lChild ) { + if( !_lChild->equivalent(globals, mNode2->_lChild) ) + return false; + } else if (mNode2->_lChild) { + return false; // I have NULL left child, mNode2 has non-NULL left child. + } + + if (_rChild ) { + if( !_rChild->equivalent(globals, mNode2->_rChild) ) + return false; + } else if (mNode2->_rChild) { + return false; // I have NULL right child, mNode2 has non-NULL right child. + } + + // We've made it through the gauntlet. + return true; } //-------------------------- has_commutative_op ------------------------------- diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/adlc/output_c.cpp --- a/src/share/vm/adlc/output_c.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/adlc/output_c.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -3088,10 +3088,10 @@ int label_position = instr->label_position(); if( label_position != -1 ) { // Set the label - fprintf(fp,"void %sNode::label_set( Label& label, uint block_num ) {\n", instr->_ident); + fprintf(fp,"void %sNode::label_set( Label* label, uint block_num ) {\n", instr->_ident); fprintf(fp," labelOper* oper = (labelOper*)(opnd_array(%d));\n", label_position ); - fprintf(fp," oper->_label = &label;\n"); + fprintf(fp," oper->_label = label;\n"); fprintf(fp," oper->_block_num = block_num;\n"); fprintf(fp,"}\n"); } diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/adlc/output_h.cpp --- a/src/share/vm/adlc/output_h.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/adlc/output_h.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -1520,7 +1520,7 @@ int label_position = instr->label_position(); if( label_position != -1 ) { // Set the label, stored in labelOper::_branch_label - fprintf(fp," virtual void label_set( Label& label, uint block_num );\n"); + fprintf(fp," virtual void label_set( Label* label, uint block_num );\n"); } // If this instruction contains a methodOper diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/opto/compile.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -517,7 +517,17 @@ buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize); // Do the emission. + + Label fakeL; // Fake label for branch instructions. + bool is_branch = n->is_Branch() && n->as_Mach()->ideal_Opcode() != Op_Jump; + if (is_branch) { + MacroAssembler masm(&buf); + masm.bind(fakeL); + n->as_Mach()->label_set(&fakeL, 0); + } n->emit(buf, this->regalloc()); + if (is_branch) // Clear the reference to fake label. + n->as_Mach()->label_set(NULL, 0); // End scratch_emit_size section. set_in_scratch_emit_size(false); diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/opto/machnode.cpp --- a/src/share/vm/opto/machnode.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/opto/machnode.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -409,7 +409,7 @@ //------------------------------label_set-------------------------------------- // Set the Label for a LabelOper, if an operand for this instruction -void MachNode::label_set( Label& label, uint block_num ) { +void MachNode::label_set( Label* label, uint block_num ) { ShouldNotCallThis(); } @@ -514,6 +514,9 @@ void MachNullCheckNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { // only emits entries in the null-pointer exception handler table } +void MachNullCheckNode::label_set(Label* label, uint block_num) { + // Nothing to emit +} const RegMask &MachNullCheckNode::in_RegMask( uint idx ) const { if( idx == 0 ) return RegMask::Empty; diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/opto/machnode.hpp --- a/src/share/vm/opto/machnode.hpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/opto/machnode.hpp Thu Jul 21 11:25:07 2011 -0700 @@ -282,7 +282,7 @@ virtual int ideal_Opcode() const { return Op_Node; } // Set the branch inside jump MachNodes. Error for non-branch Nodes. - virtual void label_set( Label& label, uint block_num ); + virtual void label_set( Label* label, uint block_num ); // Adds the label for the case virtual void add_case_label( int switch_val, Label* blockLabel); @@ -531,6 +531,7 @@ } virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const; + virtual void label_set(Label* label, uint block_num); virtual bool pinned() const { return true; }; virtual void negate() { } virtual const class Type *bottom_type() const { return TypeTuple::IFBOTH; } @@ -853,7 +854,7 @@ virtual MachOper *clone(Compile* C) const; - virtual Label *label() const { return _label; } + virtual Label *label() const { assert(_label != NULL, "need Label"); return _label; } virtual uint opcode() const; diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/opto/output.cpp Thu Jul 21 11:25:07 2011 -0700 @@ -1346,7 +1346,7 @@ // For Branchs // This requires the TRUE branch target be in succs[0] uint block_num = b->non_connector_successor(0)->_pre_order; - mach->label_set( blk_labels[block_num], block_num ); + mach->label_set( &blk_labels[block_num], block_num ); } } diff -r 6a991dcb52bb -r 3d42f82cd811 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Jul 21 08:38:25 2011 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu Jul 21 11:25:07 2011 -0700 @@ -1205,6 +1205,9 @@ product(bool, UseUnalignedLoadStores, false, \ "Use SSE2 MOVDQU instruction for Arraycopy") \ \ + product(bool, UseCBCond, false, \ + "Use compare and branch instruction on SPARC") \ + \ product(intx, FieldsAllocationStyle, 1, \ "0 - type based with oops first, 1 - with oops last, " \ "2 - oops in super and sub classes are together") \