Mercurial > hg > graal-compiler
comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 342:37f87013dfd8
6711316: Open source the Garbage-First garbage collector
Summary: First mercurial integration of the code for the Garbage-First garbage collector.
Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr
author | ysr |
---|---|
date | Thu, 05 Jun 2008 15:57:56 -0700 |
parents | b130b98db9cf |
children | 6aae2f9d0294 |
comparison
equal
deleted
inserted
replaced
189:0b27f3512f9e | 342:37f87013dfd8 |
---|---|
126 return r; | 126 return r; |
127 } | 127 } |
128 | 128 |
129 int AbstractAssembler::code_fill_byte() { | 129 int AbstractAssembler::code_fill_byte() { |
130 return 0x00; // illegal instruction 0x00000000 | 130 return 0x00; // illegal instruction 0x00000000 |
131 } | |
132 | |
133 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) { | |
134 switch (in) { | |
135 case rc_z: return equal; | |
136 case rc_lez: return lessEqual; | |
137 case rc_lz: return less; | |
138 case rc_nz: return notEqual; | |
139 case rc_gz: return greater; | |
140 case rc_gez: return greaterEqual; | |
141 default: | |
142 ShouldNotReachHere(); | |
143 } | |
144 return equal; | |
131 } | 145 } |
132 | 146 |
133 // Generate a bunch 'o stuff (including v9's | 147 // Generate a bunch 'o stuff (including v9's |
134 #ifndef PRODUCT | 148 #ifndef PRODUCT |
135 void Assembler::test_v9() { | 149 void Assembler::test_v9() { |
1211 | 1225 |
1212 st_ptr(oop_result, vm_result_addr); | 1226 st_ptr(oop_result, vm_result_addr); |
1213 } | 1227 } |
1214 | 1228 |
1215 | 1229 |
1216 void MacroAssembler::store_check(Register tmp, Register obj) { | 1230 void MacroAssembler::card_table_write(jbyte* byte_map_base, |
1217 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) | 1231 Register tmp, Register obj) { |
1218 | |
1219 /* $$$ This stuff needs to go into one of the BarrierSet generator | |
1220 functions. (The particular barrier sets will have to be friends of | |
1221 MacroAssembler, I guess.) */ | |
1222 BarrierSet* bs = Universe::heap()->barrier_set(); | |
1223 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); | |
1224 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
1225 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
1226 #ifdef _LP64 | 1232 #ifdef _LP64 |
1227 srlx(obj, CardTableModRefBS::card_shift, obj); | 1233 srlx(obj, CardTableModRefBS::card_shift, obj); |
1228 #else | 1234 #else |
1229 srl(obj, CardTableModRefBS::card_shift, obj); | 1235 srl(obj, CardTableModRefBS::card_shift, obj); |
1230 #endif | 1236 #endif |
1231 assert( tmp != obj, "need separate temp reg"); | 1237 assert( tmp != obj, "need separate temp reg"); |
1232 Address rs(tmp, (address)ct->byte_map_base); | 1238 Address rs(tmp, (address)byte_map_base); |
1233 load_address(rs); | 1239 load_address(rs); |
1234 stb(G0, rs.base(), obj); | 1240 stb(G0, rs.base(), obj); |
1235 } | |
1236 | |
1237 void MacroAssembler::store_check(Register tmp, Register obj, Register offset) { | |
1238 store_check(tmp, obj); | |
1239 } | 1241 } |
1240 | 1242 |
1241 // %%% Note: The following six instructions have been moved, | 1243 // %%% Note: The following six instructions have been moved, |
1242 // unchanged, from assembler_sparc.inline.hpp. | 1244 // unchanged, from assembler_sparc.inline.hpp. |
1243 // They will be refactored at a later date. | 1245 // They will be refactored at a later date. |
1646 // plausibility check for oops | 1648 // plausibility check for oops |
1647 if (!VerifyOops) return; | 1649 if (!VerifyOops) return; |
1648 | 1650 |
1649 if (reg == G0) return; // always NULL, which is always an oop | 1651 if (reg == G0) return; // always NULL, which is always an oop |
1650 | 1652 |
1651 char buffer[16]; | 1653 char buffer[64]; |
1654 #ifdef COMPILER1 | |
1655 if (CommentedAssembly) { | |
1656 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); | |
1657 block_comment(buffer); | |
1658 } | |
1659 #endif | |
1660 | |
1661 int len = strlen(file) + strlen(msg) + 1 + 4; | |
1652 sprintf(buffer, "%d", line); | 1662 sprintf(buffer, "%d", line); |
1653 int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); | 1663 len += strlen(buffer); |
1664 sprintf(buffer, " at offset %d ", offset()); | |
1665 len += strlen(buffer); | |
1654 char * real_msg = new char[len]; | 1666 char * real_msg = new char[len]; |
1655 sprintf(real_msg, "%s (%s:%d)", msg, file, line); | 1667 sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); |
1656 | 1668 |
1657 // Call indirectly to solve generation ordering problem | 1669 // Call indirectly to solve generation ordering problem |
1658 Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); | 1670 Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); |
1659 | 1671 |
1660 // Make some space on stack above the current register window. | 1672 // Make some space on stack above the current register window. |
2040 bpr( rc_nz, a, p, s1, L ); | 2052 bpr( rc_nz, a, p, s1, L ); |
2041 #else | 2053 #else |
2042 tst(s1); | 2054 tst(s1); |
2043 br ( notZero, a, p, L ); | 2055 br ( notZero, a, p, L ); |
2044 #endif | 2056 #endif |
2057 } | |
2058 | |
2059 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, | |
2060 Register s1, address d, | |
2061 relocInfo::relocType rt ) { | |
2062 if (VM_Version::v9_instructions_work()) { | |
2063 bpr(rc, a, p, s1, d, rt); | |
2064 } else { | |
2065 tst(s1); | |
2066 br(reg_cond_to_cc_cond(rc), a, p, d, rt); | |
2067 } | |
2068 } | |
2069 | |
2070 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, | |
2071 Register s1, Label& L ) { | |
2072 if (VM_Version::v9_instructions_work()) { | |
2073 bpr(rc, a, p, s1, L); | |
2074 } else { | |
2075 tst(s1); | |
2076 br(reg_cond_to_cc_cond(rc), a, p, L); | |
2077 } | |
2045 } | 2078 } |
2046 | 2079 |
2047 | 2080 |
2048 // instruction sequences factored across compiler & interpreter | 2081 // instruction sequences factored across compiler & interpreter |
2049 | 2082 |
3224 // make sure arguments make sense | 3257 // make sure arguments make sense |
3225 assert_different_registers(obj, var_size_in_bytes, t1, t2); | 3258 assert_different_registers(obj, var_size_in_bytes, t1, t2); |
3226 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); | 3259 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); |
3227 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); | 3260 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); |
3228 | 3261 |
3229 // get eden boundaries | 3262 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { |
3230 // note: we need both top & top_addr! | 3263 // No allocation in the shared eden. |
3231 const Register top_addr = t1; | 3264 br(Assembler::always, false, Assembler::pt, slow_case); |
3232 const Register end = t2; | 3265 delayed()->nop(); |
3233 | 3266 } else { |
3234 CollectedHeap* ch = Universe::heap(); | 3267 // get eden boundaries |
3235 set((intx)ch->top_addr(), top_addr); | 3268 // note: we need both top & top_addr! |
3236 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); | 3269 const Register top_addr = t1; |
3237 ld_ptr(top_addr, delta, end); | 3270 const Register end = t2; |
3238 ld_ptr(top_addr, 0, obj); | 3271 |
3239 | 3272 CollectedHeap* ch = Universe::heap(); |
3240 // try to allocate | 3273 set((intx)ch->top_addr(), top_addr); |
3241 Label retry; | 3274 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); |
3242 bind(retry); | 3275 ld_ptr(top_addr, delta, end); |
3276 ld_ptr(top_addr, 0, obj); | |
3277 | |
3278 // try to allocate | |
3279 Label retry; | |
3280 bind(retry); | |
3243 #ifdef ASSERT | 3281 #ifdef ASSERT |
3244 // make sure eden top is properly aligned | 3282 // make sure eden top is properly aligned |
3245 { | 3283 { |
3246 Label L; | 3284 Label L; |
3247 btst(MinObjAlignmentInBytesMask, obj); | 3285 btst(MinObjAlignmentInBytesMask, obj); |
3248 br(Assembler::zero, false, Assembler::pt, L); | 3286 br(Assembler::zero, false, Assembler::pt, L); |
3249 delayed()->nop(); | 3287 delayed()->nop(); |
3250 stop("eden top is not properly aligned"); | 3288 stop("eden top is not properly aligned"); |
3251 bind(L); | 3289 bind(L); |
3252 } | 3290 } |
3253 #endif // ASSERT | 3291 #endif // ASSERT |
3254 const Register free = end; | 3292 const Register free = end; |
3255 sub(end, obj, free); // compute amount of free space | 3293 sub(end, obj, free); // compute amount of free space |
3256 if (var_size_in_bytes->is_valid()) { | 3294 if (var_size_in_bytes->is_valid()) { |
3257 // size is unknown at compile time | 3295 // size is unknown at compile time |
3258 cmp(free, var_size_in_bytes); | 3296 cmp(free, var_size_in_bytes); |
3259 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case | 3297 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3260 delayed()->add(obj, var_size_in_bytes, end); | 3298 delayed()->add(obj, var_size_in_bytes, end); |
3261 } else { | 3299 } else { |
3262 // size is known at compile time | 3300 // size is known at compile time |
3263 cmp(free, con_size_in_bytes); | 3301 cmp(free, con_size_in_bytes); |
3264 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case | 3302 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3265 delayed()->add(obj, con_size_in_bytes, end); | 3303 delayed()->add(obj, con_size_in_bytes, end); |
3266 } | 3304 } |
3267 // Compare obj with the value at top_addr; if still equal, swap the value of | 3305 // Compare obj with the value at top_addr; if still equal, swap the value of |
3268 // end with the value at top_addr. If not equal, read the value at top_addr | 3306 // end with the value at top_addr. If not equal, read the value at top_addr |
3269 // into end. | 3307 // into end. |
3270 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); | 3308 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
3271 // if someone beat us on the allocation, try again, otherwise continue | 3309 // if someone beat us on the allocation, try again, otherwise continue |
3272 cmp(obj, end); | 3310 cmp(obj, end); |
3273 brx(Assembler::notEqual, false, Assembler::pn, retry); | 3311 brx(Assembler::notEqual, false, Assembler::pn, retry); |
3274 delayed()->mov(end, obj); // nop if successfull since obj == end | 3312 delayed()->mov(end, obj); // nop if successfull since obj == end |
3275 | 3313 |
3276 #ifdef ASSERT | 3314 #ifdef ASSERT |
3277 // make sure eden top is properly aligned | 3315 // make sure eden top is properly aligned |
3278 { | 3316 { |
3279 Label L; | 3317 Label L; |
3280 const Register top_addr = t1; | 3318 const Register top_addr = t1; |
3281 | 3319 |
3282 set((intx)ch->top_addr(), top_addr); | 3320 set((intx)ch->top_addr(), top_addr); |
3283 ld_ptr(top_addr, 0, top_addr); | 3321 ld_ptr(top_addr, 0, top_addr); |
3284 btst(MinObjAlignmentInBytesMask, top_addr); | 3322 btst(MinObjAlignmentInBytesMask, top_addr); |
3285 br(Assembler::zero, false, Assembler::pt, L); | 3323 br(Assembler::zero, false, Assembler::pt, L); |
3286 delayed()->nop(); | 3324 delayed()->nop(); |
3287 stop("eden top is not properly aligned"); | 3325 stop("eden top is not properly aligned"); |
3288 bind(L); | 3326 bind(L); |
3289 } | 3327 } |
3290 #endif // ASSERT | 3328 #endif // ASSERT |
3329 } | |
3291 } | 3330 } |
3292 | 3331 |
3293 | 3332 |
3294 void MacroAssembler::tlab_allocate( | 3333 void MacroAssembler::tlab_allocate( |
3295 Register obj, // result: pointer to object after successful allocation | 3334 Register obj, // result: pointer to object after successful allocation |
3535 set((-i*offset)+STACK_BIAS, Rscratch); | 3574 set((-i*offset)+STACK_BIAS, Rscratch); |
3536 st(G0, Rtsp, Rscratch); | 3575 st(G0, Rtsp, Rscratch); |
3537 } | 3576 } |
3538 } | 3577 } |
3539 | 3578 |
3579 /////////////////////////////////////////////////////////////////////////////////// | |
3580 #ifndef SERIALGC | |
3581 | |
3582 static uint num_stores = 0; | |
3583 static uint num_null_pre_stores = 0; | |
3584 | |
3585 static void count_null_pre_vals(void* pre_val) { | |
3586 num_stores++; | |
3587 if (pre_val == NULL) num_null_pre_stores++; | |
3588 if ((num_stores % 1000000) == 0) { | |
3589 tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.", | |
3590 num_stores, num_null_pre_stores, | |
3591 100.0*(float)num_null_pre_stores/(float)num_stores); | |
3592 } | |
3593 } | |
3594 | |
3595 static address satb_log_enqueue_with_frame = 0; | |
3596 static u_char* satb_log_enqueue_with_frame_end = 0; | |
3597 | |
3598 static address satb_log_enqueue_frameless = 0; | |
3599 static u_char* satb_log_enqueue_frameless_end = 0; | |
3600 | |
3601 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? | |
3602 | |
3603 // The calls to this don't work. We'd need to do a fair amount of work to | |
3604 // make it work. | |
3605 static void check_index(int ind) { | |
3606 assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0), | |
3607 "Invariants.") | |
3608 } | |
3609 | |
3610 static void generate_satb_log_enqueue(bool with_frame) { | |
3611 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); | |
3612 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); | |
3613 MacroAssembler masm(&buf); | |
3614 address start = masm.pc(); | |
3615 Register pre_val; | |
3616 | |
3617 Label refill, restart; | |
3618 if (with_frame) { | |
3619 masm.save_frame(0); | |
3620 pre_val = I0; // Was O0 before the save. | |
3621 } else { | |
3622 pre_val = O0; | |
3623 } | |
3624 int satb_q_index_byte_offset = | |
3625 in_bytes(JavaThread::satb_mark_queue_offset() + | |
3626 PtrQueue::byte_offset_of_index()); | |
3627 int satb_q_buf_byte_offset = | |
3628 in_bytes(JavaThread::satb_mark_queue_offset() + | |
3629 PtrQueue::byte_offset_of_buf()); | |
3630 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && | |
3631 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), | |
3632 "check sizes in assembly below"); | |
3633 | |
3634 masm.bind(restart); | |
3635 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); | |
3636 | |
3637 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); | |
3638 // If the branch is taken, no harm in executing this in the delay slot. | |
3639 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); | |
3640 masm.sub(L0, oopSize, L0); | |
3641 | |
3642 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 | |
3643 if (!with_frame) { | |
3644 // Use return-from-leaf | |
3645 masm.retl(); | |
3646 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); | |
3647 } else { | |
3648 // Not delayed. | |
3649 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); | |
3650 } | |
3651 if (with_frame) { | |
3652 masm.ret(); | |
3653 masm.delayed()->restore(); | |
3654 } | |
3655 masm.bind(refill); | |
3656 | |
3657 address handle_zero = | |
3658 CAST_FROM_FN_PTR(address, | |
3659 &SATBMarkQueueSet::handle_zero_index_for_thread); | |
3660 // This should be rare enough that we can afford to save all the | |
3661 // scratch registers that the calling context might be using. | |
3662 masm.mov(G1_scratch, L0); | |
3663 masm.mov(G3_scratch, L1); | |
3664 masm.mov(G4, L2); | |
3665 // We need the value of O0 above (for the write into the buffer), so we | |
3666 // save and restore it. | |
3667 masm.mov(O0, L3); | |
3668 // Since the call will overwrite O7, we save and restore that, as well. | |
3669 masm.mov(O7, L4); | |
3670 masm.call_VM_leaf(L5, handle_zero, G2_thread); | |
3671 masm.mov(L0, G1_scratch); | |
3672 masm.mov(L1, G3_scratch); | |
3673 masm.mov(L2, G4); | |
3674 masm.mov(L3, O0); | |
3675 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); | |
3676 masm.delayed()->mov(L4, O7); | |
3677 | |
3678 if (with_frame) { | |
3679 satb_log_enqueue_with_frame = start; | |
3680 satb_log_enqueue_with_frame_end = masm.pc(); | |
3681 } else { | |
3682 satb_log_enqueue_frameless = start; | |
3683 satb_log_enqueue_frameless_end = masm.pc(); | |
3684 } | |
3685 } | |
3686 | |
3687 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { | |
3688 if (with_frame) { | |
3689 if (satb_log_enqueue_with_frame == 0) { | |
3690 generate_satb_log_enqueue(with_frame); | |
3691 assert(satb_log_enqueue_with_frame != 0, "postcondition."); | |
3692 if (G1SATBPrintStubs) { | |
3693 tty->print_cr("Generated with-frame satb enqueue:"); | |
3694 Disassembler::decode((u_char*)satb_log_enqueue_with_frame, | |
3695 satb_log_enqueue_with_frame_end, | |
3696 tty); | |
3697 } | |
3698 } | |
3699 } else { | |
3700 if (satb_log_enqueue_frameless == 0) { | |
3701 generate_satb_log_enqueue(with_frame); | |
3702 assert(satb_log_enqueue_frameless != 0, "postcondition."); | |
3703 if (G1SATBPrintStubs) { | |
3704 tty->print_cr("Generated frameless satb enqueue:"); | |
3705 Disassembler::decode((u_char*)satb_log_enqueue_frameless, | |
3706 satb_log_enqueue_frameless_end, | |
3707 tty); | |
3708 } | |
3709 } | |
3710 } | |
3711 } | |
3712 | |
3713 void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) { | |
3714 assert(offset == 0 || index == noreg, "choose one"); | |
3715 | |
3716 if (G1DisablePreBarrier) return; | |
3717 // satb_log_barrier(tmp, obj, offset, preserve_o_regs); | |
3718 Label filtered; | |
3719 // satb_log_barrier_work0(tmp, filtered); | |
3720 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { | |
3721 ld(G2, | |
3722 in_bytes(JavaThread::satb_mark_queue_offset() + | |
3723 PtrQueue::byte_offset_of_active()), | |
3724 tmp); | |
3725 } else { | |
3726 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, | |
3727 "Assumption"); | |
3728 ldsb(G2, | |
3729 in_bytes(JavaThread::satb_mark_queue_offset() + | |
3730 PtrQueue::byte_offset_of_active()), | |
3731 tmp); | |
3732 } | |
3733 // Check on whether to annul. | |
3734 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); | |
3735 delayed() -> nop(); | |
3736 | |
3737 // satb_log_barrier_work1(tmp, offset); | |
3738 if (index == noreg) { | |
3739 if (Assembler::is_simm13(offset)) { | |
3740 ld_ptr(obj, offset, tmp); | |
3741 } else { | |
3742 set(offset, tmp); | |
3743 ld_ptr(obj, tmp, tmp); | |
3744 } | |
3745 } else { | |
3746 ld_ptr(obj, index, tmp); | |
3747 } | |
3748 | |
3749 // satb_log_barrier_work2(obj, tmp, offset); | |
3750 | |
3751 // satb_log_barrier_work3(tmp, filtered, preserve_o_regs); | |
3752 | |
3753 const Register pre_val = tmp; | |
3754 | |
3755 if (G1SATBBarrierPrintNullPreVals) { | |
3756 save_frame(0); | |
3757 mov(pre_val, O0); | |
3758 // Save G-regs that target may use. | |
3759 mov(G1, L1); | |
3760 mov(G2, L2); | |
3761 mov(G3, L3); | |
3762 mov(G4, L4); | |
3763 mov(G5, L5); | |
3764 call(CAST_FROM_FN_PTR(address, &count_null_pre_vals)); | |
3765 delayed()->nop(); | |
3766 // Restore G-regs that target may have used. | |
3767 mov(L1, G1); | |
3768 mov(L2, G2); | |
3769 mov(L3, G3); | |
3770 mov(L4, G4); | |
3771 mov(L5, G5); | |
3772 restore(G0, G0, G0); | |
3773 } | |
3774 | |
3775 // Check on whether to annul. | |
3776 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); | |
3777 delayed() -> nop(); | |
3778 | |
3779 // OK, it's not filtered, so we'll need to call enqueue. In the normal | |
3780 // case, pre_val will be a scratch G-reg, but there's some cases in which | |
3781 // it's an O-reg. In the first case, do a normal call. In the latter, | |
3782 // do a save here and call the frameless version. | |
3783 | |
3784 guarantee(pre_val->is_global() || pre_val->is_out(), | |
3785 "Or we need to think harder."); | |
3786 if (pre_val->is_global() && !preserve_o_regs) { | |
3787 generate_satb_log_enqueue_if_necessary(true); // with frame. | |
3788 call(satb_log_enqueue_with_frame); | |
3789 delayed()->mov(pre_val, O0); | |
3790 } else { | |
3791 generate_satb_log_enqueue_if_necessary(false); // with frameless. | |
3792 save_frame(0); | |
3793 call(satb_log_enqueue_frameless); | |
3794 delayed()->mov(pre_val->after_save(), O0); | |
3795 restore(); | |
3796 } | |
3797 | |
3798 bind(filtered); | |
3799 } | |
3800 | |
3801 static jint num_ct_writes = 0; | |
3802 static jint num_ct_writes_filtered_in_hr = 0; | |
3803 static jint num_ct_writes_filtered_null = 0; | |
3804 static jint num_ct_writes_filtered_pop = 0; | |
3805 static G1CollectedHeap* g1 = NULL; | |
3806 | |
3807 static Thread* count_ct_writes(void* filter_val, void* new_val) { | |
3808 Atomic::inc(&num_ct_writes); | |
3809 if (filter_val == NULL) { | |
3810 Atomic::inc(&num_ct_writes_filtered_in_hr); | |
3811 } else if (new_val == NULL) { | |
3812 Atomic::inc(&num_ct_writes_filtered_null); | |
3813 } else { | |
3814 if (g1 == NULL) { | |
3815 g1 = G1CollectedHeap::heap(); | |
3816 } | |
3817 if ((HeapWord*)new_val < g1->popular_object_boundary()) { | |
3818 Atomic::inc(&num_ct_writes_filtered_pop); | |
3819 } | |
3820 } | |
3821 if ((num_ct_writes % 1000000) == 0) { | |
3822 jint num_ct_writes_filtered = | |
3823 num_ct_writes_filtered_in_hr + | |
3824 num_ct_writes_filtered_null + | |
3825 num_ct_writes_filtered_pop; | |
3826 | |
3827 tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" | |
3828 " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", | |
3829 num_ct_writes, | |
3830 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, | |
3831 100.0*(float)num_ct_writes_filtered_in_hr/ | |
3832 (float)num_ct_writes, | |
3833 100.0*(float)num_ct_writes_filtered_null/ | |
3834 (float)num_ct_writes, | |
3835 100.0*(float)num_ct_writes_filtered_pop/ | |
3836 (float)num_ct_writes); | |
3837 } | |
3838 return Thread::current(); | |
3839 } | |
3840 | |
3841 static address dirty_card_log_enqueue = 0; | |
3842 static u_char* dirty_card_log_enqueue_end = 0; | |
3843 | |
3844 // This gets to assume that o0 contains the object address. | |
3845 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { | |
3846 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); | |
3847 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); | |
3848 MacroAssembler masm(&buf); | |
3849 address start = masm.pc(); | |
3850 | |
3851 Label not_already_dirty, restart, refill; | |
3852 | |
3853 #ifdef _LP64 | |
3854 masm.srlx(O0, CardTableModRefBS::card_shift, O0); | |
3855 #else | |
3856 masm.srl(O0, CardTableModRefBS::card_shift, O0); | |
3857 #endif | |
3858 Address rs(O1, (address)byte_map_base); | |
3859 masm.load_address(rs); // O1 := <card table base> | |
3860 masm.ldub(O0, O1, O2); // O2 := [O0 + O1] | |
3861 | |
3862 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, | |
3863 O2, not_already_dirty); | |
3864 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch | |
3865 // case, harmless if not. | |
3866 masm.delayed()->add(O0, O1, O3); | |
3867 | |
3868 // We didn't take the branch, so we're already dirty: return. | |
3869 // Use return-from-leaf | |
3870 masm.retl(); | |
3871 masm.delayed()->nop(); | |
3872 | |
3873 // Not dirty. | |
3874 masm.bind(not_already_dirty); | |
3875 // First, dirty it. | |
3876 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). | |
3877 int dirty_card_q_index_byte_offset = | |
3878 in_bytes(JavaThread::dirty_card_queue_offset() + | |
3879 PtrQueue::byte_offset_of_index()); | |
3880 int dirty_card_q_buf_byte_offset = | |
3881 in_bytes(JavaThread::dirty_card_queue_offset() + | |
3882 PtrQueue::byte_offset_of_buf()); | |
3883 masm.bind(restart); | |
3884 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); | |
3885 | |
3886 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, | |
3887 L0, refill); | |
3888 // If the branch is taken, no harm in executing this in the delay slot. | |
3889 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); | |
3890 masm.sub(L0, oopSize, L0); | |
3891 | |
3892 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 | |
3893 // Use return-from-leaf | |
3894 masm.retl(); | |
3895 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); | |
3896 | |
3897 masm.bind(refill); | |
3898 address handle_zero = | |
3899 CAST_FROM_FN_PTR(address, | |
3900 &DirtyCardQueueSet::handle_zero_index_for_thread); | |
3901 // This should be rare enough that we can afford to save all the | |
3902 // scratch registers that the calling context might be using. | |
3903 masm.mov(G1_scratch, L3); | |
3904 masm.mov(G3_scratch, L5); | |
3905 // We need the value of O3 above (for the write into the buffer), so we | |
3906 // save and restore it. | |
3907 masm.mov(O3, L6); | |
3908 // Since the call will overwrite O7, we save and restore that, as well. | |
3909 masm.mov(O7, L4); | |
3910 | |
3911 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); | |
3912 masm.mov(L3, G1_scratch); | |
3913 masm.mov(L5, G3_scratch); | |
3914 masm.mov(L6, O3); | |
3915 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); | |
3916 masm.delayed()->mov(L4, O7); | |
3917 | |
3918 dirty_card_log_enqueue = start; | |
3919 dirty_card_log_enqueue_end = masm.pc(); | |
3920 // XXX Should have a guarantee here about not going off the end! | |
3921 // Does it already do so? Do an experiment... | |
3922 } | |
3923 | |
3924 static inline void | |
3925 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { | |
3926 if (dirty_card_log_enqueue == 0) { | |
3927 generate_dirty_card_log_enqueue(byte_map_base); | |
3928 assert(dirty_card_log_enqueue != 0, "postcondition."); | |
3929 if (G1SATBPrintStubs) { | |
3930 tty->print_cr("Generated dirty_card enqueue:"); | |
3931 Disassembler::decode((u_char*)dirty_card_log_enqueue, | |
3932 dirty_card_log_enqueue_end, | |
3933 tty); | |
3934 } | |
3935 } | |
3936 } | |
3937 | |
3938 | |
3939 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { | |
3940 | |
3941 Label filtered; | |
3942 MacroAssembler* post_filter_masm = this; | |
3943 | |
3944 if (new_val == G0) return; | |
3945 if (G1DisablePostBarrier) return; | |
3946 | |
3947 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); | |
3948 assert(bs->kind() == BarrierSet::G1SATBCT || | |
3949 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); | |
3950 if (G1RSBarrierRegionFilter) { | |
3951 xor3(store_addr, new_val, tmp); | |
3952 #ifdef _LP64 | |
3953 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); | |
3954 #else | |
3955 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); | |
3956 #endif | |
3957 if (G1PrintCTFilterStats) { | |
3958 guarantee(tmp->is_global(), "Or stats won't work..."); | |
3959 // This is a sleazy hack: I'm temporarily hijacking G2, which I | |
3960 // promise to restore. | |
3961 mov(new_val, G2); | |
3962 save_frame(0); | |
3963 mov(tmp, O0); | |
3964 mov(G2, O1); | |
3965 // Save G-regs that target may use. | |
3966 mov(G1, L1); | |
3967 mov(G2, L2); | |
3968 mov(G3, L3); | |
3969 mov(G4, L4); | |
3970 mov(G5, L5); | |
3971 call(CAST_FROM_FN_PTR(address, &count_ct_writes)); | |
3972 delayed()->nop(); | |
3973 mov(O0, G2); | |
3974 // Restore G-regs that target may have used. | |
3975 mov(L1, G1); | |
3976 mov(L3, G3); | |
3977 mov(L4, G4); | |
3978 mov(L5, G5); | |
3979 restore(G0, G0, G0); | |
3980 } | |
3981 // XXX Should I predict this taken or not? Does it mattern? | |
3982 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); | |
3983 delayed()->nop(); | |
3984 } | |
3985 | |
3986 // Now we decide how to generate the card table write. If we're | |
3987 // enqueueing, we call out to a generated function. Otherwise, we do it | |
3988 // inline here. | |
3989 | |
3990 if (G1RSBarrierUseQueue) { | |
3991 // If the "store_addr" register is an "in" or "local" register, move it to | |
3992 // a scratch reg so we can pass it as an argument. | |
3993 bool use_scr = !(store_addr->is_global() || store_addr->is_out()); | |
3994 // Pick a scratch register different from "tmp". | |
3995 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); | |
3996 // Make sure we use up the delay slot! | |
3997 if (use_scr) { | |
3998 post_filter_masm->mov(store_addr, scr); | |
3999 } else { | |
4000 post_filter_masm->nop(); | |
4001 } | |
4002 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); | |
4003 save_frame(0); | |
4004 call(dirty_card_log_enqueue); | |
4005 if (use_scr) { | |
4006 delayed()->mov(scr, O0); | |
4007 } else { | |
4008 delayed()->mov(store_addr->after_save(), O0); | |
4009 } | |
4010 restore(); | |
4011 | |
4012 } else { | |
4013 | |
4014 #ifdef _LP64 | |
4015 post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); | |
4016 #else | |
4017 post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); | |
4018 #endif | |
4019 assert( tmp != store_addr, "need separate temp reg"); | |
4020 Address rs(tmp, (address)bs->byte_map_base); | |
4021 load_address(rs); | |
4022 stb(G0, rs.base(), store_addr); | |
4023 } | |
4024 | |
4025 bind(filtered); | |
4026 | |
4027 } | |
4028 | |
4029 #endif // SERIALGC | |
4030 /////////////////////////////////////////////////////////////////////////////////// | |
4031 | |
4032 void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { | |
4033 // If we're writing constant NULL, we can skip the write barrier. | |
4034 if (new_val == G0) return; | |
4035 CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); | |
4036 assert(bs->kind() == BarrierSet::CardTableModRef || | |
4037 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); | |
4038 card_table_write(bs->byte_map_base, tmp, store_addr); | |
4039 } | |
4040 | |
3540 void MacroAssembler::load_klass(Register s, Register d) { | 4041 void MacroAssembler::load_klass(Register s, Register d) { |
3541 // The number of bytes in this code is used by | 4042 // The number of bytes in this code is used by |
3542 // MachCallDynamicJavaNode::ret_addr_offset() | 4043 // MachCallDynamicJavaNode::ret_addr_offset() |
3543 // if this changes, change that. | 4044 // if this changes, change that. |
3544 if (UseCompressedOops) { | 4045 if (UseCompressedOops) { |