comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 342:37f87013dfd8

6711316: Open source the Garbage-First garbage collector Summary: First mercurial integration of the code for the Garbage-First garbage collector. Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr
author ysr
date Thu, 05 Jun 2008 15:57:56 -0700
parents b130b98db9cf
children 6aae2f9d0294
comparison
equal deleted inserted replaced
189:0b27f3512f9e 342:37f87013dfd8
126 return r; 126 return r;
127 } 127 }
128 128
129 int AbstractAssembler::code_fill_byte() { 129 int AbstractAssembler::code_fill_byte() {
130 return 0x00; // illegal instruction 0x00000000 130 return 0x00; // illegal instruction 0x00000000
131 }
132
133 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
134 switch (in) {
135 case rc_z: return equal;
136 case rc_lez: return lessEqual;
137 case rc_lz: return less;
138 case rc_nz: return notEqual;
139 case rc_gz: return greater;
140 case rc_gez: return greaterEqual;
141 default:
142 ShouldNotReachHere();
143 }
144 return equal;
131 } 145 }
132 146
133 // Generate a bunch 'o stuff (including v9's 147 // Generate a bunch 'o stuff (including v9's
134 #ifndef PRODUCT 148 #ifndef PRODUCT
135 void Assembler::test_v9() { 149 void Assembler::test_v9() {
1211 1225
1212 st_ptr(oop_result, vm_result_addr); 1226 st_ptr(oop_result, vm_result_addr);
1213 } 1227 }
1214 1228
1215 1229
1216 void MacroAssembler::store_check(Register tmp, Register obj) { 1230 void MacroAssembler::card_table_write(jbyte* byte_map_base,
1217 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1231 Register tmp, Register obj) {
1218
1219 /* $$$ This stuff needs to go into one of the BarrierSet generator
1220 functions. (The particular barrier sets will have to be friends of
1221 MacroAssembler, I guess.) */
1222 BarrierSet* bs = Universe::heap()->barrier_set();
1223 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
1224 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1225 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1226 #ifdef _LP64 1232 #ifdef _LP64
1227 srlx(obj, CardTableModRefBS::card_shift, obj); 1233 srlx(obj, CardTableModRefBS::card_shift, obj);
1228 #else 1234 #else
1229 srl(obj, CardTableModRefBS::card_shift, obj); 1235 srl(obj, CardTableModRefBS::card_shift, obj);
1230 #endif 1236 #endif
1231 assert( tmp != obj, "need separate temp reg"); 1237 assert( tmp != obj, "need separate temp reg");
1232 Address rs(tmp, (address)ct->byte_map_base); 1238 Address rs(tmp, (address)byte_map_base);
1233 load_address(rs); 1239 load_address(rs);
1234 stb(G0, rs.base(), obj); 1240 stb(G0, rs.base(), obj);
1235 }
1236
1237 void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
1238 store_check(tmp, obj);
1239 } 1241 }
1240 1242
1241 // %%% Note: The following six instructions have been moved, 1243 // %%% Note: The following six instructions have been moved,
1242 // unchanged, from assembler_sparc.inline.hpp. 1244 // unchanged, from assembler_sparc.inline.hpp.
1243 // They will be refactored at a later date. 1245 // They will be refactored at a later date.
1646 // plausibility check for oops 1648 // plausibility check for oops
1647 if (!VerifyOops) return; 1649 if (!VerifyOops) return;
1648 1650
1649 if (reg == G0) return; // always NULL, which is always an oop 1651 if (reg == G0) return; // always NULL, which is always an oop
1650 1652
1651 char buffer[16]; 1653 char buffer[64];
1654 #ifdef COMPILER1
1655 if (CommentedAssembly) {
1656 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1657 block_comment(buffer);
1658 }
1659 #endif
1660
1661 int len = strlen(file) + strlen(msg) + 1 + 4;
1652 sprintf(buffer, "%d", line); 1662 sprintf(buffer, "%d", line);
1653 int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); 1663 len += strlen(buffer);
1664 sprintf(buffer, " at offset %d ", offset());
1665 len += strlen(buffer);
1654 char * real_msg = new char[len]; 1666 char * real_msg = new char[len];
1655 sprintf(real_msg, "%s (%s:%d)", msg, file, line); 1667 sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
1656 1668
1657 // Call indirectly to solve generation ordering problem 1669 // Call indirectly to solve generation ordering problem
1658 Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); 1670 Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
1659 1671
1660 // Make some space on stack above the current register window. 1672 // Make some space on stack above the current register window.
2040 bpr( rc_nz, a, p, s1, L ); 2052 bpr( rc_nz, a, p, s1, L );
2041 #else 2053 #else
2042 tst(s1); 2054 tst(s1);
2043 br ( notZero, a, p, L ); 2055 br ( notZero, a, p, L );
2044 #endif 2056 #endif
2057 }
2058
2059 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2060 Register s1, address d,
2061 relocInfo::relocType rt ) {
2062 if (VM_Version::v9_instructions_work()) {
2063 bpr(rc, a, p, s1, d, rt);
2064 } else {
2065 tst(s1);
2066 br(reg_cond_to_cc_cond(rc), a, p, d, rt);
2067 }
2068 }
2069
2070 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2071 Register s1, Label& L ) {
2072 if (VM_Version::v9_instructions_work()) {
2073 bpr(rc, a, p, s1, L);
2074 } else {
2075 tst(s1);
2076 br(reg_cond_to_cc_cond(rc), a, p, L);
2077 }
2045 } 2078 }
2046 2079
2047 2080
2048 // instruction sequences factored across compiler & interpreter 2081 // instruction sequences factored across compiler & interpreter
2049 2082
3224 // make sure arguments make sense 3257 // make sure arguments make sense
3225 assert_different_registers(obj, var_size_in_bytes, t1, t2); 3258 assert_different_registers(obj, var_size_in_bytes, t1, t2);
3226 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); 3259 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
3227 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); 3260 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3228 3261
3229 // get eden boundaries 3262 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3230 // note: we need both top & top_addr! 3263 // No allocation in the shared eden.
3231 const Register top_addr = t1; 3264 br(Assembler::always, false, Assembler::pt, slow_case);
3232 const Register end = t2; 3265 delayed()->nop();
3233 3266 } else {
3234 CollectedHeap* ch = Universe::heap(); 3267 // get eden boundaries
3235 set((intx)ch->top_addr(), top_addr); 3268 // note: we need both top & top_addr!
3236 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); 3269 const Register top_addr = t1;
3237 ld_ptr(top_addr, delta, end); 3270 const Register end = t2;
3238 ld_ptr(top_addr, 0, obj); 3271
3239 3272 CollectedHeap* ch = Universe::heap();
3240 // try to allocate 3273 set((intx)ch->top_addr(), top_addr);
3241 Label retry; 3274 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
3242 bind(retry); 3275 ld_ptr(top_addr, delta, end);
3276 ld_ptr(top_addr, 0, obj);
3277
3278 // try to allocate
3279 Label retry;
3280 bind(retry);
3243 #ifdef ASSERT 3281 #ifdef ASSERT
3244 // make sure eden top is properly aligned 3282 // make sure eden top is properly aligned
3245 { 3283 {
3246 Label L; 3284 Label L;
3247 btst(MinObjAlignmentInBytesMask, obj); 3285 btst(MinObjAlignmentInBytesMask, obj);
3248 br(Assembler::zero, false, Assembler::pt, L); 3286 br(Assembler::zero, false, Assembler::pt, L);
3249 delayed()->nop(); 3287 delayed()->nop();
3250 stop("eden top is not properly aligned"); 3288 stop("eden top is not properly aligned");
3251 bind(L); 3289 bind(L);
3252 } 3290 }
3253 #endif // ASSERT 3291 #endif // ASSERT
3254 const Register free = end; 3292 const Register free = end;
3255 sub(end, obj, free); // compute amount of free space 3293 sub(end, obj, free); // compute amount of free space
3256 if (var_size_in_bytes->is_valid()) { 3294 if (var_size_in_bytes->is_valid()) {
3257 // size is unknown at compile time 3295 // size is unknown at compile time
3258 cmp(free, var_size_in_bytes); 3296 cmp(free, var_size_in_bytes);
3259 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case 3297 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3260 delayed()->add(obj, var_size_in_bytes, end); 3298 delayed()->add(obj, var_size_in_bytes, end);
3261 } else { 3299 } else {
3262 // size is known at compile time 3300 // size is known at compile time
3263 cmp(free, con_size_in_bytes); 3301 cmp(free, con_size_in_bytes);
3264 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case 3302 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
3265 delayed()->add(obj, con_size_in_bytes, end); 3303 delayed()->add(obj, con_size_in_bytes, end);
3266 } 3304 }
3267 // Compare obj with the value at top_addr; if still equal, swap the value of 3305 // Compare obj with the value at top_addr; if still equal, swap the value of
3268 // end with the value at top_addr. If not equal, read the value at top_addr 3306 // end with the value at top_addr. If not equal, read the value at top_addr
3269 // into end. 3307 // into end.
3270 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); 3308 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3271 // if someone beat us on the allocation, try again, otherwise continue 3309 // if someone beat us on the allocation, try again, otherwise continue
3272 cmp(obj, end); 3310 cmp(obj, end);
3273 brx(Assembler::notEqual, false, Assembler::pn, retry); 3311 brx(Assembler::notEqual, false, Assembler::pn, retry);
3274 delayed()->mov(end, obj); // nop if successfull since obj == end 3312 delayed()->mov(end, obj); // nop if successfull since obj == end
3275 3313
3276 #ifdef ASSERT 3314 #ifdef ASSERT
3277 // make sure eden top is properly aligned 3315 // make sure eden top is properly aligned
3278 { 3316 {
3279 Label L; 3317 Label L;
3280 const Register top_addr = t1; 3318 const Register top_addr = t1;
3281 3319
3282 set((intx)ch->top_addr(), top_addr); 3320 set((intx)ch->top_addr(), top_addr);
3283 ld_ptr(top_addr, 0, top_addr); 3321 ld_ptr(top_addr, 0, top_addr);
3284 btst(MinObjAlignmentInBytesMask, top_addr); 3322 btst(MinObjAlignmentInBytesMask, top_addr);
3285 br(Assembler::zero, false, Assembler::pt, L); 3323 br(Assembler::zero, false, Assembler::pt, L);
3286 delayed()->nop(); 3324 delayed()->nop();
3287 stop("eden top is not properly aligned"); 3325 stop("eden top is not properly aligned");
3288 bind(L); 3326 bind(L);
3289 } 3327 }
3290 #endif // ASSERT 3328 #endif // ASSERT
3329 }
3291 } 3330 }
3292 3331
3293 3332
3294 void MacroAssembler::tlab_allocate( 3333 void MacroAssembler::tlab_allocate(
3295 Register obj, // result: pointer to object after successful allocation 3334 Register obj, // result: pointer to object after successful allocation
3535 set((-i*offset)+STACK_BIAS, Rscratch); 3574 set((-i*offset)+STACK_BIAS, Rscratch);
3536 st(G0, Rtsp, Rscratch); 3575 st(G0, Rtsp, Rscratch);
3537 } 3576 }
3538 } 3577 }
3539 3578
3579 ///////////////////////////////////////////////////////////////////////////////////
3580 #ifndef SERIALGC
3581
3582 static uint num_stores = 0;
3583 static uint num_null_pre_stores = 0;
3584
3585 static void count_null_pre_vals(void* pre_val) {
3586 num_stores++;
3587 if (pre_val == NULL) num_null_pre_stores++;
3588 if ((num_stores % 1000000) == 0) {
3589 tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
3590 num_stores, num_null_pre_stores,
3591 100.0*(float)num_null_pre_stores/(float)num_stores);
3592 }
3593 }
3594
3595 static address satb_log_enqueue_with_frame = 0;
3596 static u_char* satb_log_enqueue_with_frame_end = 0;
3597
3598 static address satb_log_enqueue_frameless = 0;
3599 static u_char* satb_log_enqueue_frameless_end = 0;
3600
3601 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
3602
3603 // The calls to this don't work. We'd need to do a fair amount of work to
3604 // make it work.
3605 static void check_index(int ind) {
3606 assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
3607 "Invariants.")
3608 }
3609
3610 static void generate_satb_log_enqueue(bool with_frame) {
3611 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
3612 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
3613 MacroAssembler masm(&buf);
3614 address start = masm.pc();
3615 Register pre_val;
3616
3617 Label refill, restart;
3618 if (with_frame) {
3619 masm.save_frame(0);
3620 pre_val = I0; // Was O0 before the save.
3621 } else {
3622 pre_val = O0;
3623 }
3624 int satb_q_index_byte_offset =
3625 in_bytes(JavaThread::satb_mark_queue_offset() +
3626 PtrQueue::byte_offset_of_index());
3627 int satb_q_buf_byte_offset =
3628 in_bytes(JavaThread::satb_mark_queue_offset() +
3629 PtrQueue::byte_offset_of_buf());
3630 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
3631 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
3632 "check sizes in assembly below");
3633
3634 masm.bind(restart);
3635 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
3636
3637 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
3638 // If the branch is taken, no harm in executing this in the delay slot.
3639 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
3640 masm.sub(L0, oopSize, L0);
3641
3642 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0
3643 if (!with_frame) {
3644 // Use return-from-leaf
3645 masm.retl();
3646 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
3647 } else {
3648 // Not delayed.
3649 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
3650 }
3651 if (with_frame) {
3652 masm.ret();
3653 masm.delayed()->restore();
3654 }
3655 masm.bind(refill);
3656
3657 address handle_zero =
3658 CAST_FROM_FN_PTR(address,
3659 &SATBMarkQueueSet::handle_zero_index_for_thread);
3660 // This should be rare enough that we can afford to save all the
3661 // scratch registers that the calling context might be using.
3662 masm.mov(G1_scratch, L0);
3663 masm.mov(G3_scratch, L1);
3664 masm.mov(G4, L2);
3665 // We need the value of O0 above (for the write into the buffer), so we
3666 // save and restore it.
3667 masm.mov(O0, L3);
3668 // Since the call will overwrite O7, we save and restore that, as well.
3669 masm.mov(O7, L4);
3670 masm.call_VM_leaf(L5, handle_zero, G2_thread);
3671 masm.mov(L0, G1_scratch);
3672 masm.mov(L1, G3_scratch);
3673 masm.mov(L2, G4);
3674 masm.mov(L3, O0);
3675 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
3676 masm.delayed()->mov(L4, O7);
3677
3678 if (with_frame) {
3679 satb_log_enqueue_with_frame = start;
3680 satb_log_enqueue_with_frame_end = masm.pc();
3681 } else {
3682 satb_log_enqueue_frameless = start;
3683 satb_log_enqueue_frameless_end = masm.pc();
3684 }
3685 }
3686
3687 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
3688 if (with_frame) {
3689 if (satb_log_enqueue_with_frame == 0) {
3690 generate_satb_log_enqueue(with_frame);
3691 assert(satb_log_enqueue_with_frame != 0, "postcondition.");
3692 if (G1SATBPrintStubs) {
3693 tty->print_cr("Generated with-frame satb enqueue:");
3694 Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
3695 satb_log_enqueue_with_frame_end,
3696 tty);
3697 }
3698 }
3699 } else {
3700 if (satb_log_enqueue_frameless == 0) {
3701 generate_satb_log_enqueue(with_frame);
3702 assert(satb_log_enqueue_frameless != 0, "postcondition.");
3703 if (G1SATBPrintStubs) {
3704 tty->print_cr("Generated frameless satb enqueue:");
3705 Disassembler::decode((u_char*)satb_log_enqueue_frameless,
3706 satb_log_enqueue_frameless_end,
3707 tty);
3708 }
3709 }
3710 }
3711 }
3712
3713 void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
3714 assert(offset == 0 || index == noreg, "choose one");
3715
3716 if (G1DisablePreBarrier) return;
3717 // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
3718 Label filtered;
3719 // satb_log_barrier_work0(tmp, filtered);
3720 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
3721 ld(G2,
3722 in_bytes(JavaThread::satb_mark_queue_offset() +
3723 PtrQueue::byte_offset_of_active()),
3724 tmp);
3725 } else {
3726 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
3727 "Assumption");
3728 ldsb(G2,
3729 in_bytes(JavaThread::satb_mark_queue_offset() +
3730 PtrQueue::byte_offset_of_active()),
3731 tmp);
3732 }
3733 // Check on whether to annul.
3734 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
3735 delayed() -> nop();
3736
3737 // satb_log_barrier_work1(tmp, offset);
3738 if (index == noreg) {
3739 if (Assembler::is_simm13(offset)) {
3740 ld_ptr(obj, offset, tmp);
3741 } else {
3742 set(offset, tmp);
3743 ld_ptr(obj, tmp, tmp);
3744 }
3745 } else {
3746 ld_ptr(obj, index, tmp);
3747 }
3748
3749 // satb_log_barrier_work2(obj, tmp, offset);
3750
3751 // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
3752
3753 const Register pre_val = tmp;
3754
3755 if (G1SATBBarrierPrintNullPreVals) {
3756 save_frame(0);
3757 mov(pre_val, O0);
3758 // Save G-regs that target may use.
3759 mov(G1, L1);
3760 mov(G2, L2);
3761 mov(G3, L3);
3762 mov(G4, L4);
3763 mov(G5, L5);
3764 call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
3765 delayed()->nop();
3766 // Restore G-regs that target may have used.
3767 mov(L1, G1);
3768 mov(L2, G2);
3769 mov(L3, G3);
3770 mov(L4, G4);
3771 mov(L5, G5);
3772 restore(G0, G0, G0);
3773 }
3774
3775 // Check on whether to annul.
3776 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
3777 delayed() -> nop();
3778
3779 // OK, it's not filtered, so we'll need to call enqueue. In the normal
3780 // case, pre_val will be a scratch G-reg, but there's some cases in which
3781 // it's an O-reg. In the first case, do a normal call. In the latter,
3782 // do a save here and call the frameless version.
3783
3784 guarantee(pre_val->is_global() || pre_val->is_out(),
3785 "Or we need to think harder.");
3786 if (pre_val->is_global() && !preserve_o_regs) {
3787 generate_satb_log_enqueue_if_necessary(true); // with frame.
3788 call(satb_log_enqueue_with_frame);
3789 delayed()->mov(pre_val, O0);
3790 } else {
3791 generate_satb_log_enqueue_if_necessary(false); // with frameless.
3792 save_frame(0);
3793 call(satb_log_enqueue_frameless);
3794 delayed()->mov(pre_val->after_save(), O0);
3795 restore();
3796 }
3797
3798 bind(filtered);
3799 }
3800
3801 static jint num_ct_writes = 0;
3802 static jint num_ct_writes_filtered_in_hr = 0;
3803 static jint num_ct_writes_filtered_null = 0;
3804 static jint num_ct_writes_filtered_pop = 0;
3805 static G1CollectedHeap* g1 = NULL;
3806
3807 static Thread* count_ct_writes(void* filter_val, void* new_val) {
3808 Atomic::inc(&num_ct_writes);
3809 if (filter_val == NULL) {
3810 Atomic::inc(&num_ct_writes_filtered_in_hr);
3811 } else if (new_val == NULL) {
3812 Atomic::inc(&num_ct_writes_filtered_null);
3813 } else {
3814 if (g1 == NULL) {
3815 g1 = G1CollectedHeap::heap();
3816 }
3817 if ((HeapWord*)new_val < g1->popular_object_boundary()) {
3818 Atomic::inc(&num_ct_writes_filtered_pop);
3819 }
3820 }
3821 if ((num_ct_writes % 1000000) == 0) {
3822 jint num_ct_writes_filtered =
3823 num_ct_writes_filtered_in_hr +
3824 num_ct_writes_filtered_null +
3825 num_ct_writes_filtered_pop;
3826
3827 tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
3828 " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
3829 num_ct_writes,
3830 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
3831 100.0*(float)num_ct_writes_filtered_in_hr/
3832 (float)num_ct_writes,
3833 100.0*(float)num_ct_writes_filtered_null/
3834 (float)num_ct_writes,
3835 100.0*(float)num_ct_writes_filtered_pop/
3836 (float)num_ct_writes);
3837 }
3838 return Thread::current();
3839 }
3840
3841 static address dirty_card_log_enqueue = 0;
3842 static u_char* dirty_card_log_enqueue_end = 0;
3843
3844 // This gets to assume that o0 contains the object address.
3845 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
3846 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
3847 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
3848 MacroAssembler masm(&buf);
3849 address start = masm.pc();
3850
3851 Label not_already_dirty, restart, refill;
3852
3853 #ifdef _LP64
3854 masm.srlx(O0, CardTableModRefBS::card_shift, O0);
3855 #else
3856 masm.srl(O0, CardTableModRefBS::card_shift, O0);
3857 #endif
3858 Address rs(O1, (address)byte_map_base);
3859 masm.load_address(rs); // O1 := <card table base>
3860 masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
3861
3862 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
3863 O2, not_already_dirty);
3864 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
3865 // case, harmless if not.
3866 masm.delayed()->add(O0, O1, O3);
3867
3868 // We didn't take the branch, so we're already dirty: return.
3869 // Use return-from-leaf
3870 masm.retl();
3871 masm.delayed()->nop();
3872
3873 // Not dirty.
3874 masm.bind(not_already_dirty);
3875 // First, dirty it.
3876 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
3877 int dirty_card_q_index_byte_offset =
3878 in_bytes(JavaThread::dirty_card_queue_offset() +
3879 PtrQueue::byte_offset_of_index());
3880 int dirty_card_q_buf_byte_offset =
3881 in_bytes(JavaThread::dirty_card_queue_offset() +
3882 PtrQueue::byte_offset_of_buf());
3883 masm.bind(restart);
3884 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
3885
3886 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
3887 L0, refill);
3888 // If the branch is taken, no harm in executing this in the delay slot.
3889 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
3890 masm.sub(L0, oopSize, L0);
3891
3892 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0
3893 // Use return-from-leaf
3894 masm.retl();
3895 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
3896
3897 masm.bind(refill);
3898 address handle_zero =
3899 CAST_FROM_FN_PTR(address,
3900 &DirtyCardQueueSet::handle_zero_index_for_thread);
3901 // This should be rare enough that we can afford to save all the
3902 // scratch registers that the calling context might be using.
3903 masm.mov(G1_scratch, L3);
3904 masm.mov(G3_scratch, L5);
3905 // We need the value of O3 above (for the write into the buffer), so we
3906 // save and restore it.
3907 masm.mov(O3, L6);
3908 // Since the call will overwrite O7, we save and restore that, as well.
3909 masm.mov(O7, L4);
3910
3911 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
3912 masm.mov(L3, G1_scratch);
3913 masm.mov(L5, G3_scratch);
3914 masm.mov(L6, O3);
3915 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
3916 masm.delayed()->mov(L4, O7);
3917
3918 dirty_card_log_enqueue = start;
3919 dirty_card_log_enqueue_end = masm.pc();
3920 // XXX Should have a guarantee here about not going off the end!
3921 // Does it already do so? Do an experiment...
3922 }
3923
3924 static inline void
3925 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
3926 if (dirty_card_log_enqueue == 0) {
3927 generate_dirty_card_log_enqueue(byte_map_base);
3928 assert(dirty_card_log_enqueue != 0, "postcondition.");
3929 if (G1SATBPrintStubs) {
3930 tty->print_cr("Generated dirty_card enqueue:");
3931 Disassembler::decode((u_char*)dirty_card_log_enqueue,
3932 dirty_card_log_enqueue_end,
3933 tty);
3934 }
3935 }
3936 }
3937
3938
3939 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
3940
3941 Label filtered;
3942 MacroAssembler* post_filter_masm = this;
3943
3944 if (new_val == G0) return;
3945 if (G1DisablePostBarrier) return;
3946
3947 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
3948 assert(bs->kind() == BarrierSet::G1SATBCT ||
3949 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
3950 if (G1RSBarrierRegionFilter) {
3951 xor3(store_addr, new_val, tmp);
3952 #ifdef _LP64
3953 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3954 #else
3955 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
3956 #endif
3957 if (G1PrintCTFilterStats) {
3958 guarantee(tmp->is_global(), "Or stats won't work...");
3959 // This is a sleazy hack: I'm temporarily hijacking G2, which I
3960 // promise to restore.
3961 mov(new_val, G2);
3962 save_frame(0);
3963 mov(tmp, O0);
3964 mov(G2, O1);
3965 // Save G-regs that target may use.
3966 mov(G1, L1);
3967 mov(G2, L2);
3968 mov(G3, L3);
3969 mov(G4, L4);
3970 mov(G5, L5);
3971 call(CAST_FROM_FN_PTR(address, &count_ct_writes));
3972 delayed()->nop();
3973 mov(O0, G2);
3974 // Restore G-regs that target may have used.
3975 mov(L1, G1);
3976 mov(L3, G3);
3977 mov(L4, G4);
3978 mov(L5, G5);
3979 restore(G0, G0, G0);
3980 }
3981 // XXX Should I predict this taken or not? Does it mattern?
3982 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
3983 delayed()->nop();
3984 }
3985
3986 // Now we decide how to generate the card table write. If we're
3987 // enqueueing, we call out to a generated function. Otherwise, we do it
3988 // inline here.
3989
3990 if (G1RSBarrierUseQueue) {
3991 // If the "store_addr" register is an "in" or "local" register, move it to
3992 // a scratch reg so we can pass it as an argument.
3993 bool use_scr = !(store_addr->is_global() || store_addr->is_out());
3994 // Pick a scratch register different from "tmp".
3995 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
3996 // Make sure we use up the delay slot!
3997 if (use_scr) {
3998 post_filter_masm->mov(store_addr, scr);
3999 } else {
4000 post_filter_masm->nop();
4001 }
4002 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
4003 save_frame(0);
4004 call(dirty_card_log_enqueue);
4005 if (use_scr) {
4006 delayed()->mov(scr, O0);
4007 } else {
4008 delayed()->mov(store_addr->after_save(), O0);
4009 }
4010 restore();
4011
4012 } else {
4013
4014 #ifdef _LP64
4015 post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
4016 #else
4017 post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
4018 #endif
4019 assert( tmp != store_addr, "need separate temp reg");
4020 Address rs(tmp, (address)bs->byte_map_base);
4021 load_address(rs);
4022 stb(G0, rs.base(), store_addr);
4023 }
4024
4025 bind(filtered);
4026
4027 }
4028
4029 #endif // SERIALGC
4030 ///////////////////////////////////////////////////////////////////////////////////
4031
4032 void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
4033 // If we're writing constant NULL, we can skip the write barrier.
4034 if (new_val == G0) return;
4035 CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
4036 assert(bs->kind() == BarrierSet::CardTableModRef ||
4037 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
4038 card_table_write(bs->byte_map_base, tmp, store_addr);
4039 }
4040
3540 void MacroAssembler::load_klass(Register s, Register d) { 4041 void MacroAssembler::load_klass(Register s, Register d) {
3541 // The number of bytes in this code is used by 4042 // The number of bytes in this code is used by
3542 // MachCallDynamicJavaNode::ret_addr_offset() 4043 // MachCallDynamicJavaNode::ret_addr_offset()
3543 // if this changes, change that. 4044 // if this changes, change that.
3544 if (UseCompressedOops) { 4045 if (UseCompressedOops) {