# HG changeset patch # User ysr # Date 1212706676 25200 # Node ID 37f87013dfd831979f18702ebddc42cceeb2b445 # Parent 0b27f3512f9eaba6da4e866b3887c38850408055 6711316: Open source the Garbage-First garbage collector Summary: First mercurial integration of the code for the Garbage-First garbage collector. Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr diff -r 0b27f3512f9e -r 37f87013dfd8 make/linux/makefiles/top.make --- a/make/linux/makefiles/top.make Wed Jun 04 13:51:09 2008 -0700 +++ b/make/linux/makefiles/top.make Thu Jun 05 15:57:56 2008 -0700 @@ -64,6 +64,7 @@ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared diff -r 0b27f3512f9e -r 37f87013dfd8 make/solaris/makefiles/top.make --- a/make/solaris/makefiles/top.make Wed Jun 04 13:51:09 2008 -0700 +++ b/make/solaris/makefiles/top.make Thu Jun 05 15:57:56 2008 -0700 @@ -54,6 +54,7 @@ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/generated.make --- a/make/windows/makefiles/generated.make Wed Jun 04 13:51:09 2008 -0700 +++ b/make/windows/makefiles/generated.make Thu Jun 05 15:57:56 2008 -0700 @@ -50,7 +50,8 @@ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \ - $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \ + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \ $(WorkSpace)/src/share/vm/includeDB_features diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/makedeps.make --- a/make/windows/makefiles/makedeps.make Wed Jun 04 13:51:09 2008 -0700 +++ b/make/windows/makefiles/makedeps.make Thu Jun 05 15:57:56 2008 -0700 @@ -64,6 +64,7 @@ -relativeInclude src\share\vm\gc_implementation\shared \ -relativeInclude src\share\vm\gc_implementation\parNew \ -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \ + -relativeInclude src\share\vm\gc_implementation\g1 \ -relativeInclude src\share\vm\gc_interface \ -relativeInclude src\share\vm\asm \ -relativeInclude src\share\vm\memory \ @@ -115,6 +116,7 @@ -additionalFile includeDB_gc_parallel \ -additionalFile includeDB_gc_parallelScavenge \ -additionalFile includeDB_gc_concurrentMarkSweep \ + -additionalFile includeDB_gc_g1 \ -additionalFile includeDB_gc_parNew \ -additionalFile includeDB_gc_shared \ -additionalFile includeDB_gc_serial \ diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/vm.make --- a/make/windows/makefiles/vm.make Wed Jun 04 13:51:09 2008 -0700 +++ b/make/windows/makefiles/vm.make Thu Jun 05 15:57:56 2008 -0700 @@ -110,6 +110,7 @@ /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\ + /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\ /I "$(WorkSpace)\src\share\vm\gc_interface"\ /I "$(WorkSpace)\src\share\vm\asm" \ /I "$(WorkSpace)\src\share\vm\memory" \ @@ -139,6 +140,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep +VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory @@ -215,6 +217,9 @@ {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< +{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj:: + $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< + {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -130,6 +130,20 @@ return 0x00; // illegal instruction 0x00000000 } +Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) { + switch (in) { + case rc_z: return equal; + case rc_lez: return lessEqual; + case rc_lz: return less; + case rc_nz: return notEqual; + case rc_gz: return greater; + case rc_gez: return greaterEqual; + default: + ShouldNotReachHere(); + } + return equal; +} + // Generate a bunch 'o stuff (including v9's #ifndef PRODUCT void Assembler::test_v9() { @@ -1213,31 +1227,19 @@ } -void MacroAssembler::store_check(Register tmp, Register obj) { - // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) - - /* $$$ This stuff needs to go into one of the BarrierSet generator - functions. (The particular barrier sets will have to be friends of - MacroAssembler, I guess.) */ - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +void MacroAssembler::card_table_write(jbyte* byte_map_base, + Register tmp, Register obj) { #ifdef _LP64 srlx(obj, CardTableModRefBS::card_shift, obj); #else srl(obj, CardTableModRefBS::card_shift, obj); #endif assert( tmp != obj, "need separate temp reg"); - Address rs(tmp, (address)ct->byte_map_base); + Address rs(tmp, (address)byte_map_base); load_address(rs); stb(G0, rs.base(), obj); } -void MacroAssembler::store_check(Register tmp, Register obj, Register offset) { - store_check(tmp, obj); -} - // %%% Note: The following six instructions have been moved, // unchanged, from assembler_sparc.inline.hpp. // They will be refactored at a later date. @@ -1648,11 +1650,21 @@ if (reg == G0) return; // always NULL, which is always an oop - char buffer[16]; + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + + int len = strlen(file) + strlen(msg) + 1 + 4; sprintf(buffer, "%d", line); - int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); + len += strlen(buffer); + sprintf(buffer, " at offset %d ", offset()); + len += strlen(buffer); char * real_msg = new char[len]; - sprintf(real_msg, "%s (%s:%d)", msg, file, line); + sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); // Call indirectly to solve generation ordering problem Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); @@ -2044,6 +2056,27 @@ #endif } +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, address d, + relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, d, rt); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, d, rt); + } +} + +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, Label& L ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, L); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, L); + } +} + // instruction sequences factored across compiler & interpreter @@ -3226,68 +3259,74 @@ assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); - // get eden boundaries - // note: we need both top & top_addr! - const Register top_addr = t1; - const Register end = t2; - - CollectedHeap* ch = Universe::heap(); - set((intx)ch->top_addr(), top_addr); - intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); - ld_ptr(top_addr, delta, end); - ld_ptr(top_addr, 0, obj); - - // try to allocate - Label retry; - bind(retry); -#ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - btst(MinObjAlignmentInBytesMask, obj); - br(Assembler::zero, false, Assembler::pt, L); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + br(Assembler::always, false, Assembler::pt, slow_case); delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); - } + } else { + // get eden boundaries + // note: we need both top & top_addr! + const Register top_addr = t1; + const Register end = t2; + + CollectedHeap* ch = Universe::heap(); + set((intx)ch->top_addr(), top_addr); + intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); + ld_ptr(top_addr, delta, end); + ld_ptr(top_addr, 0, obj); + + // try to allocate + Label retry; + bind(retry); +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, obj); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } #endif // ASSERT - const Register free = end; - sub(end, obj, free); // compute amount of free space - if (var_size_in_bytes->is_valid()) { - // size is unknown at compile time - cmp(free, var_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, var_size_in_bytes, end); - } else { - // size is known at compile time - cmp(free, con_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, con_size_in_bytes, end); - } - // Compare obj with the value at top_addr; if still equal, swap the value of - // end with the value at top_addr. If not equal, read the value at top_addr - // into end. - casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - // if someone beat us on the allocation, try again, otherwise continue - cmp(obj, end); - brx(Assembler::notEqual, false, Assembler::pn, retry); - delayed()->mov(end, obj); // nop if successfull since obj == end + const Register free = end; + sub(end, obj, free); // compute amount of free space + if (var_size_in_bytes->is_valid()) { + // size is unknown at compile time + cmp(free, var_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, var_size_in_bytes, end); + } else { + // size is known at compile time + cmp(free, con_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, con_size_in_bytes, end); + } + // Compare obj with the value at top_addr; if still equal, swap the value of + // end with the value at top_addr. If not equal, read the value at top_addr + // into end. + casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + // if someone beat us on the allocation, try again, otherwise continue + cmp(obj, end); + brx(Assembler::notEqual, false, Assembler::pn, retry); + delayed()->mov(end, obj); // nop if successfull since obj == end #ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - const Register top_addr = t1; - - set((intx)ch->top_addr(), top_addr); - ld_ptr(top_addr, 0, top_addr); - btst(MinObjAlignmentInBytesMask, top_addr); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); + // make sure eden top is properly aligned + { + Label L; + const Register top_addr = t1; + + set((intx)ch->top_addr(), top_addr); + ld_ptr(top_addr, 0, top_addr); + btst(MinObjAlignmentInBytesMask, top_addr); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT } -#endif // ASSERT } @@ -3537,6 +3576,468 @@ } } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +static uint num_stores = 0; +static uint num_null_pre_stores = 0; + +static void count_null_pre_vals(void* pre_val) { + num_stores++; + if (pre_val == NULL) num_null_pre_stores++; + if ((num_stores % 1000000) == 0) { + tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.", + num_stores, num_null_pre_stores, + 100.0*(float)num_null_pre_stores/(float)num_stores); + } +} + +static address satb_log_enqueue_with_frame = 0; +static u_char* satb_log_enqueue_with_frame_end = 0; + +static address satb_log_enqueue_frameless = 0; +static u_char* satb_log_enqueue_frameless_end = 0; + +static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? + +// The calls to this don't work. We'd need to do a fair amount of work to +// make it work. +static void check_index(int ind) { + assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0), + "Invariants.") +} + +static void generate_satb_log_enqueue(bool with_frame) { + BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + Register pre_val; + + Label refill, restart; + if (with_frame) { + masm.save_frame(0); + pre_val = I0; // Was O0 before the save. + } else { + pre_val = O0; + } + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && + in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), + "check sizes in assembly below"); + + masm.bind(restart); + masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + if (!with_frame) { + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } else { + // Not delayed. + masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } + if (with_frame) { + masm.ret(); + masm.delayed()->restore(); + } + masm.bind(refill); + + address handle_zero = + CAST_FROM_FN_PTR(address, + &SATBMarkQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L0); + masm.mov(G3_scratch, L1); + masm.mov(G4, L2); + // We need the value of O0 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O0, L3); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + masm.call_VM_leaf(L5, handle_zero, G2_thread); + masm.mov(L0, G1_scratch); + masm.mov(L1, G3_scratch); + masm.mov(L2, G4); + masm.mov(L3, O0); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + if (with_frame) { + satb_log_enqueue_with_frame = start; + satb_log_enqueue_with_frame_end = masm.pc(); + } else { + satb_log_enqueue_frameless = start; + satb_log_enqueue_frameless_end = masm.pc(); + } +} + +static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { + if (with_frame) { + if (satb_log_enqueue_with_frame == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_with_frame != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated with-frame satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_with_frame, + satb_log_enqueue_with_frame_end, + tty); + } + } + } else { + if (satb_log_enqueue_frameless == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_frameless != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated frameless satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_frameless, + satb_log_enqueue_frameless_end, + tty); + } + } + } +} + +void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) { + assert(offset == 0 || index == noreg, "choose one"); + + if (G1DisablePreBarrier) return; + // satb_log_barrier(tmp, obj, offset, preserve_o_regs); + Label filtered; + // satb_log_barrier_work0(tmp, filtered); + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ld(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + ldsb(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed() -> nop(); + + // satb_log_barrier_work1(tmp, offset); + if (index == noreg) { + if (Assembler::is_simm13(offset)) { + ld_ptr(obj, offset, tmp); + } else { + set(offset, tmp); + ld_ptr(obj, tmp, tmp); + } + } else { + ld_ptr(obj, index, tmp); + } + + // satb_log_barrier_work2(obj, tmp, offset); + + // satb_log_barrier_work3(tmp, filtered, preserve_o_regs); + + const Register pre_val = tmp; + + if (G1SATBBarrierPrintNullPreVals) { + save_frame(0); + mov(pre_val, O0); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_null_pre_vals)); + delayed()->nop(); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L2, G2); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); + delayed() -> nop(); + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there's some cases in which + // it's an O-reg. In the first case, do a normal call. In the latter, + // do a save here and call the frameless version. + + guarantee(pre_val->is_global() || pre_val->is_out(), + "Or we need to think harder."); + if (pre_val->is_global() && !preserve_o_regs) { + generate_satb_log_enqueue_if_necessary(true); // with frame. + call(satb_log_enqueue_with_frame); + delayed()->mov(pre_val, O0); + } else { + generate_satb_log_enqueue_if_necessary(false); // with frameless. + save_frame(0); + call(satb_log_enqueue_frameless); + delayed()->mov(pre_val->after_save(), O0); + restore(); + } + + bind(filtered); +} + +static jint num_ct_writes = 0; +static jint num_ct_writes_filtered_in_hr = 0; +static jint num_ct_writes_filtered_null = 0; +static jint num_ct_writes_filtered_pop = 0; +static G1CollectedHeap* g1 = NULL; + +static Thread* count_ct_writes(void* filter_val, void* new_val) { + Atomic::inc(&num_ct_writes); + if (filter_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_in_hr); + } else if (new_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_null); + } else { + if (g1 == NULL) { + g1 = G1CollectedHeap::heap(); + } + if ((HeapWord*)new_val < g1->popular_object_boundary()) { + Atomic::inc(&num_ct_writes_filtered_pop); + } + } + if ((num_ct_writes % 1000000) == 0) { + jint num_ct_writes_filtered = + num_ct_writes_filtered_in_hr + + num_ct_writes_filtered_null + + num_ct_writes_filtered_pop; + + tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" + " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", + num_ct_writes, + 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_in_hr/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_null/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_pop/ + (float)num_ct_writes); + } + return Thread::current(); +} + +static address dirty_card_log_enqueue = 0; +static u_char* dirty_card_log_enqueue_end = 0; + +// This gets to assume that o0 contains the object address. +static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { + BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + masm.srlx(O0, CardTableModRefBS::card_shift, O0); +#else + masm.srl(O0, CardTableModRefBS::card_shift, O0); +#endif + Address rs(O1, (address)byte_map_base); + masm.load_address(rs); // O1 := + masm.ldub(O0, O1, O2); // O2 := [O0 + O1] + + masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + O2, not_already_dirty); + // Get O1 + O2 into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + masm.delayed()->add(O0, O1, O3); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + masm.retl(); + masm.delayed()->nop(); + + // Not dirty. + masm.bind(not_already_dirty); + // First, dirty it. + masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + masm.bind(restart); + masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + masm.bind(refill); + address handle_zero = + CAST_FROM_FN_PTR(address, + &DirtyCardQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L3); + masm.mov(G3_scratch, L5); + // We need the value of O3 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O3, L6); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + + masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + masm.mov(L3, G1_scratch); + masm.mov(L5, G3_scratch); + masm.mov(L6, O3); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + dirty_card_log_enqueue = start; + dirty_card_log_enqueue_end = masm.pc(); + // XXX Should have a guarantee here about not going off the end! + // Does it already do so? Do an experiment... +} + +static inline void +generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { + if (dirty_card_log_enqueue == 0) { + generate_dirty_card_log_enqueue(byte_map_base); + assert(dirty_card_log_enqueue != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated dirty_card enqueue:"); + Disassembler::decode((u_char*)dirty_card_log_enqueue, + dirty_card_log_enqueue_end, + tty); + } + } +} + + +void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + + Label filtered; + MacroAssembler* post_filter_masm = this; + + if (new_val == G0) return; + if (G1DisablePostBarrier) return; + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCT || + bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + if (G1RSBarrierRegionFilter) { + xor3(store_addr, new_val, tmp); +#ifdef _LP64 + srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#else + srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#endif + if (G1PrintCTFilterStats) { + guarantee(tmp->is_global(), "Or stats won't work..."); + // This is a sleazy hack: I'm temporarily hijacking G2, which I + // promise to restore. + mov(new_val, G2); + save_frame(0); + mov(tmp, O0); + mov(G2, O1); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_ct_writes)); + delayed()->nop(); + mov(O0, G2); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + // XXX Should I predict this taken or not? Does it mattern? + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed()->nop(); + } + + // Now we decide how to generate the card table write. If we're + // enqueueing, we call out to a generated function. Otherwise, we do it + // inline here. + + if (G1RSBarrierUseQueue) { + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); + } else { + post_filter_masm->nop(); + } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); + + } else { + +#ifdef _LP64 + post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); +#else + post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); +#endif + assert( tmp != store_addr, "need separate temp reg"); + Address rs(tmp, (address)bs->byte_map_base); + load_address(rs); + stb(G0, rs.base(), store_addr); + } + + bind(filtered); + +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + +void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + // If we're writing constant NULL, we can skip the write barrier. + if (new_val == G0) return; + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); + card_table_write(bs->byte_map_base, tmp, store_addr); +} + void MacroAssembler::load_klass(Register s, Register d) { // The number of bytes in this code is used by // MachCallDynamicJavaNode::ret_addr_offset() diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -1439,7 +1439,11 @@ // pp 214 void save( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); } - void save( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } + void save( Register s1, int simm13a, Register d ) { + // make sure frame is at least large enough for the register save area + assert(-simm13a >= 16 * wordSize, "frame too small"); + emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); + } void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); } void restore( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } @@ -1594,6 +1598,11 @@ inline void wrasi( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); } inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); } + // For a given register condition, return the appropriate condition code + // Condition (the one you would use to get the same effect after "tst" on + // the target register.) + Assembler::Condition reg_cond_to_cc_cond(RCondition in); + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { @@ -1630,6 +1639,8 @@ // restore global registers in case C code disturbed them static void restore_registers(MacroAssembler* a, Register r); + + }; @@ -1722,6 +1733,12 @@ void br_null ( Register s1, bool a, Predict p, Label& L ); void br_notnull( Register s1, bool a, Predict p, Label& L ); + // These versions will do the most efficient thing on v8 and v9. Perhaps + // this is what the routine above was meant to do, but it didn't (and + // didn't cover both target address kinds.) + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); @@ -2055,9 +2072,23 @@ #endif // ASSERT public: - // Stores - void store_check(Register tmp, Register obj); // store check for obj - register is destroyed afterwards - void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards + + // Write to card table for - register is destroyed afterwards. + void card_table_write(jbyte* byte_map_base, Register tmp, Register obj); + + void card_write_barrier_post(Register store_addr, Register new_val, Register tmp); + +#ifndef SERIALGC + // Array store and offset + void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs); + + void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp); + + // May do filtering, depending on the boolean arguments. + void g1_card_table_write(jbyte* byte_map_base, + Register tmp, Register obj, Register new_val, + bool region_filter, bool null_filter); +#endif // SERIALGC // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack void push_fTOS(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -404,4 +404,55 @@ } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + pre_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id)); + __ delayed()->mov(pre_val_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register addr_reg = addr()->as_pointer_register(); + Register new_val_reg = new_val()->as_register(); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + new_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id)); + __ delayed()->mov(addr_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + #undef __ diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -2093,7 +2093,11 @@ // the known type isn't loaded since the code sanity checks // in debug mode and the type isn't required when we know the exact type // also check that the type is an array type. - if (op->expected_type() == NULL) { + // We also, for now, always call the stub if the barrier set requires a + // write_ref_pre barrier (which the stub does, but none of the optimized + // cases currently does). + if (op->expected_type() == NULL || + Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) { __ mov(src, O0); __ mov(src_pos, O1); __ mov(dst, O2); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -365,6 +365,10 @@ __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info); } + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); + } __ move(value.result(), array_addr, null_check_info); if (obj_store) { // Is this precise? @@ -663,6 +667,10 @@ __ add(obj.result(), offset.result(), addr); + if (type == objectType) { // Write-barrier needed for Object fields. + pre_barrier(obj.result(), false, NULL); + } + if (type == objectType) __ cas_obj(addr, cmp.result(), val.result(), t1, t2); else if (type == intType) @@ -677,7 +685,11 @@ LIR_Opr result = rlock_result(x); __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result); if (type == objectType) { // Write-barrier needed for Object fields. +#ifdef PRECISE_CARDMARK + post_barrier(addr, val.result()); +#else post_barrier(obj.result(), val.result()); +#endif // PRECISE_CARDMARK } } @@ -1153,6 +1165,10 @@ addr = new LIR_Address(base_op, index_op, type); } + if (is_obj) { + pre_barrier(LIR_OprFact::address(addr), false, NULL); + // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr)); + } __ move(data, addr); if (is_obj) { // This address is precise diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -832,6 +832,163 @@ } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { // G4: previous value of memory + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); + + Register pre_val = G4; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + + Label refill, restart; + bool with_frame = false; // I don't know if we can do with-frame. + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, + Assembler::pn, tmp, refill); + + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ sub(tmp, oopSize, tmp); + + __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(pre_val, L0); + __ mov(tmp, L1); + __ mov(tmp2, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + SATBMarkQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, pre_val); + __ mov(L1, tmp); + __ mov(L2, tmp2); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; + + case g1_post_barrier_slow_id: + { + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); + + Register addr = G4; + Register cardtable = G5; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + __ srlx(addr, CardTableModRefBS::card_shift, addr); +#else + __ srl(addr, CardTableModRefBS::card_shift, addr); +#endif + + Address rs(cardtable, (address)byte_map_base); + __ load_address(rs); // cardtable := + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] + + __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + tmp, not_already_dirty); + // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + __ delayed()->add(addr, cardtable, tmp2); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + // First, dirty it. + __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). + + Register tmp3 = cardtable; + Register tmp4 = tmp; + + // these registers are now dead + addr = cardtable = tmp = noreg; + + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + tmp3, refill); + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + __ sub(tmp3, oopSize, tmp3); + + __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(tmp2, L0); + __ mov(tmp3, L1); + __ mov(tmp4, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + DirtyCardQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, tmp2); + __ mov(L1, tmp3); + __ mov(L2, tmp4); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; +#endif // !SERIALGC + default: { __ set_info("unimplemented entry", dont_gc_arguments); __ save_frame(0); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -699,17 +699,16 @@ // Stores long into offset pointed to by base void AdapterGenerator::store_c2i_long(Register r, Register base, const int st_off, bool is_stack) { -#ifdef COMPILER2 #ifdef _LP64 // In V9, longs are given 2 64-bit slots in the interpreter, but the // data is passed in only 1 slot. __ stx(r, base, next_arg_slot(st_off)); #else +#ifdef COMPILER2 // Misaligned store of 64-bit data __ stw(r, base, arg_slot(st_off)); // lo bits __ srlx(r, 32, r); __ stw(r, base, next_arg_slot(st_off)); // hi bits -#endif // _LP64 #else if (is_stack) { // Misaligned store of 64-bit data @@ -721,6 +720,7 @@ __ stw(r , base, next_arg_slot(st_off)); // hi bits } #endif // COMPILER2 +#endif // _LP64 tag_c2i_arg(frame::TagCategory2, base, st_off, r); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1110,30 +1110,31 @@ // The input registers are overwritten. // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); if (bs->has_write_ref_pre_barrier()) { assert(bs->has_write_ref_array_pre_opt(), "Else unsupported barrier set."); - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); - // Get some new fresh output registers. __ save_frame(0); // Save the necessary global regs... will be used after. - __ mov(addr, L0); - __ mov(count, L1); - - __ mov(addr, O0); + if (addr->is_global()) { + __ mov(addr, L0); + } + if (count->is_global()) { + __ mov(count, L1); + } + __ mov(addr->after_save(), O0); // Get the count into O1 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ delayed()->mov(count, O1); - __ mov(L0, addr); - __ mov(L1, count); + __ delayed()->mov(count->after_save(), O1); + if (addr->is_global()) { + __ mov(L0, addr); + } + if (count->is_global()) { + __ mov(L1, count); + } __ restore(); } -#endif // 0 } // // Generate post-write barrier for array. @@ -1150,22 +1151,17 @@ BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); // Get some new fresh output registers. __ save_frame(0); - __ mov(addr, O0); + __ mov(addr->after_save(), O0); __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ delayed()->mov(count, O1); + __ delayed()->mov(count->after_save(), O1); __ restore(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -2412,8 +2408,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - gen_write_ref_array_pre_barrier(G1, G5); - + gen_write_ref_array_pre_barrier(O1, O2); #ifdef ASSERT // We sometimes save a frame (see partial_subtype_check below). diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -28,6 +28,79 @@ #ifndef CC_INTERP #define __ _masm-> +// Misc helpers + +// Do an oop store like *(base + index + offset) = val +// index can be noreg, +static void do_oop_store(InterpreterMacroAssembler* _masm, + Register base, + Register index, + int offset, + Register val, + Register tmp, + BarrierSet::Name barrier, + bool precise) { + assert(tmp != val && tmp != base && tmp != index, "register collision"); + assert(index == noreg || offset == 0, "only one offset"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true); + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ g1_write_barrier_post(base, val, tmp); + } + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ card_write_barrier_post(base, val, tmp); + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + ShouldNotReachHere(); + break; + default : + ShouldNotReachHere(); + + } +} + //---------------------------------------------------------------------------------------------------- // Platform-dependent initialization @@ -758,6 +831,8 @@ // O4: array element klass // O5: value klass + // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + // Generate a fast subtype check. Branch to store_ok if no // failure. Throw if failure. __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok ); @@ -767,18 +842,14 @@ // Store is OK. __ bind(store_ok); - __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Quote from rememberedSet.hpp: For objArrays, the precise card - // corresponding to the pointer store is dirtied so we don't need to - // scavenge the entire array. - Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ add(element, O1); // address the element precisely - __ store_check(G3_scratch, O1); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true); + __ ba(false,done); __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(is_null); - __ store_heap_oop(Otos_i, element); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true); + __ profile_null_seen(G3_scratch); __ inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(done); @@ -2449,8 +2520,9 @@ // atos __ pop_ptr(); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2491,8 +2563,9 @@ __ pop_ptr(); pop_and_check_object(Rclass); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch); __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2646,8 +2719,7 @@ __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); break; case Bytecodes::_fast_aputfield: - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); break; default: ShouldNotReachHere(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_32.cpp --- a/src/cpu/x86/vm/assembler_x86_32.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_32.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -3372,13 +3372,142 @@ call_VM_leaf(entry_point, 3); } - // Calls to C land // // When entering C land, the rbp, & rsp of the last Java frame have to be recorded // in the (thread-local) JavaThread object. When leaving C land, the last Java fp // has to be reset to 0. This is required to allow proper stack traversal. +////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register thread, + Register tmp, + Register tmp2, + bool tosca_live) { + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // if (!marking_in_progress) goto done; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + cmpl(in_progress, 0); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + cmpb(in_progress, 0); + } + jcc(Assembler::equal, done); + + // if (x.f == NULL) goto done; + cmpl(Address(obj, 0), NULL_WORD); + jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + movl(tmp2, Address(obj, 0)); + cmpl(index, 0); + jcc(Assembler::equal, runtime); + subl(index, wordSize); + movl(tmp, buffer); + addl(tmp, index); + movl(Address(tmp, 0), tmp2); + jmp(done); + bind(runtime); + // save the live input values + if(tosca_live) pushl(rax); + pushl(obj); + pushl(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); + popl(thread); + popl(obj); + if(tosca_live) popl(rax); + bind(done); + +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + movl(tmp, store_addr); // ebx = edx + xorl(tmp, new_val); // ebx ^= eax + shrl(tmp, HeapRegion::LogOfHRGrainBytes); // ebx <<= 9 + jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + cmpl(new_val, NULL_WORD); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + const Register card_index = tmp; + + movl(card_index, store_addr); // ebx = edx + shrl(card_index, CardTableModRefBS::card_shift); // ebx >>= 9 + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + ExternalAddress cardtable((address)ct->byte_map_base); + Address index(noreg, card_index, Address::times_1); + const Register card_addr = tmp; + leal(card_addr, as_Address(ArrayAddress(cardtable, index))); + cmpb(Address(card_addr, 0), 0); + jcc(Assembler::equal, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + movb(Address(card_addr, 0), 0); + + cmpl(queue_index, 0); + jcc(Assembler::equal, runtime); + subl(queue_index, wordSize); + movl(tmp2, buffer); + addl(tmp2, queue_index); + movl(Address(tmp2, 0), card_index); + jmp(done); + + bind(runtime); + // save the live input values + pushl(store_addr); + pushl(new_val); + pushl(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + popl(thread); + popl(new_val); + popl(store_addr); + + bind(done); + + +} + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////// + + void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. @@ -4548,29 +4677,33 @@ Register t1, Label& slow_case) { assert(obj == rax, "obj must be in rax, for cmpxchg"); assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t1; - Label retry; - bind(retry); - ExternalAddress heap_top((address) Universe::heap()->top_addr()); - movptr(obj, heap_top); - if (var_size_in_bytes == noreg) { - leal(end, Address(obj, con_size_in_bytes)); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + jmp(slow_case); } else { - leal(end, Address(obj, var_size_in_bytes, Address::times_1)); + Register end = t1; + Label retry; + bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + movptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + leal(end, Address(obj, con_size_in_bytes)); + } else { + leal(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + cmpl(end, obj); + jcc(Assembler::below, slow_case); + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + if (os::is_MP()) { + lock(); + } + cmpxchgptr(end, heap_top); + jcc(Assembler::notEqual, retry); } - // if end < obj then we wrapped around => object too long => slow case - cmpl(end, obj); - jcc(Assembler::below, slow_case); - cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); - jcc(Assembler::above, slow_case); - // Compare obj with the top addr, and if still equal, store the new top addr in - // end at the address of the top addr pointer. Sets ZF if was equal, and clears - // it otherwise. Use lock prefix for atomicity on MPs. - if (os::is_MP()) { - lock(); - } - cmpxchgptr(end, heap_top); - jcc(Assembler::notEqual, retry); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_32.hpp --- a/src/cpu/x86/vm/assembler_x86_32.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_32.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -216,9 +216,11 @@ #endif // ASSERT // accessors - bool uses(Register reg) const { - return _base == reg || _index == reg; - } + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert @@ -990,7 +992,8 @@ // on arguments should also go in here. class MacroAssembler: public Assembler { - friend class LIR_Assembler; + friend class LIR_Assembler; + friend class Runtime1; // as_Address() protected: Address as_Address(AddressLiteral adr); @@ -1151,6 +1154,10 @@ void store_check(Register obj); // store check for obj - register is destroyed afterwards void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + void g1_write_barrier_pre(Register obj, Register thread, Register tmp, Register tmp2, bool tosca_live ); + void g1_write_barrier_post(Register store_addr, Register new_val, Register thread, Register tmp, Register tmp2); + + // split store_check(Register obj) to enhance instruction interleaving void store_check_part_1(Register obj); void store_check_part_2(Register obj); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_64.cpp --- a/src/cpu/x86/vm/assembler_x86_64.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_64.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -4405,6 +4405,129 @@ call_VM_leaf(entry_point, 3); } +///////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void MacroAssembler::g1_write_barrier_pre(Register obj, Register tmp, Register tmp2, bool tosca_live ) { + Address in_progress(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address index(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // if (!marking_in_progress) goto done; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + cmpl(in_progress, 0); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + cmpb(in_progress, 0); + } + jcc(Assembler::equal, done); + + // if (x.f == NULL) goto done; + cmpq(Address(obj, 0), (int)NULL_WORD); + jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + movslq(tmp, index); + movq(tmp2, Address(obj, 0)); + cmpq(tmp, 0); + jcc(Assembler::equal, runtime); + subq(tmp, wordSize); + movl(index, tmp); + addq(tmp, buffer); + movq(Address(tmp, 0), tmp2); + jmp(done); + bind(runtime); + // save live inputs + if (tosca_live) pushq(rax); + pushq(obj); + movq(c_rarg0, Address(obj, 0)); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread); + popq(obj); + if (tosca_live) popq(rax); + bind(done); +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register tmp, + Register tmp2) { + + Address index(r15_thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(r15_thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + movq(tmp, store_addr); + xorq(tmp, new_val); + shrq(tmp, HeapRegion::LogOfHRGrainBytes); + jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + cmpq(new_val, (int)NULL_WORD); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp; + + movq(card_addr, store_addr); + shrq(card_addr, CardTableModRefBS::card_shift); + + ExternalAddress cardtable((address) ct->byte_map_base); + lea(tmp2, cardtable); + + // get the address of the card + addq(card_addr, tmp2); + + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + cmpb(Address(card_addr, 0), 0); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + movb(Address(card_addr, 0), 0); + + cmpl(index, 0); + jcc(Assembler::equal, runtime); + subl(index, wordSize); + movq(tmp2, buffer); + movslq(rscratch1, index); + addq(tmp2, rscratch1); + // log the card + movq(Address(tmp2, 0), card_addr); + jmp(done); + + bind(runtime); + // save live inputs + pushq(store_addr); + pushq(new_val); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); + popq(new_val); + popq(store_addr); + + bind(done); + + +} + +#endif // SERIALGC +///////////////////////////////////////////////////////////////////////////// // Calls to C land // @@ -4802,32 +4925,36 @@ Label& slow_case) { assert(obj == rax, "obj must be in rax for cmpxchg"); assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t1; - Label retry; - bind(retry); - ExternalAddress heap_top((address) Universe::heap()->top_addr()); - movptr(obj, heap_top); - if (var_size_in_bytes == noreg) { - leaq(end, Address(obj, con_size_in_bytes)); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + jmp(slow_case); } else { - leaq(end, Address(obj, var_size_in_bytes, Address::times_1)); + Register end = t1; + Label retry; + bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + movptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + leaq(end, Address(obj, con_size_in_bytes)); + } else { + leaq(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + cmpq(end, obj); + jcc(Assembler::below, slow_case); + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + + jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new + // top addr in end at the address of the top addr pointer. Sets ZF + // if was equal, and clears it otherwise. Use lock prefix for + // atomicity on MPs. + if (os::is_MP()) { + lock(); + } + cmpxchgptr(end, heap_top); + // if someone beat us on the allocation, try again, otherwise continue + jcc(Assembler::notEqual, retry); } - // if end < obj then we wrapped around => object too long => slow case - cmpq(end, obj); - jcc(Assembler::below, slow_case); - cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); - - jcc(Assembler::above, slow_case); - // Compare obj with the top addr, and if still equal, store the new - // top addr in end at the address of the top addr pointer. Sets ZF - // if was equal, and clears it otherwise. Use lock prefix for - // atomicity on MPs. - if (os::is_MP()) { - lock(); - } - cmpxchgptr(end, heap_top); - // if someone beat us on the allocation, try again, otherwise continue - jcc(Assembler::notEqual, retry); } // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_64.hpp --- a/src/cpu/x86/vm/assembler_x86_64.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_64.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -222,6 +222,18 @@ static Address make_raw(int base, int index, int scale, int disp); static Address make_array(ArrayAddress); + Register base() const { + return _base; + } + + Register index() const { + return _index; + } + + int disp() const { + return _disp; + } + private: bool base_needs_rex() const { @@ -1194,6 +1206,9 @@ // location (reg. is // destroyed) + void g1_write_barrier_pre(Register obj, Register tmp, Register tmp2, bool tosca_live ); + void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp, Register tmp2); + // split store_check(Register obj) to enhance instruction interleaving void store_check_part_1(Register obj); void store_check_part_2(Register obj); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_CodeStubs_x86.cpp --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -455,5 +455,50 @@ __ jmp(_continuation); } +///////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + + // At this point we know that marking is in progress + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + + __ cmpl(pre_val_reg, NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ jmp(_continuation); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cmpl(new_val_reg, NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(addr()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ jmp(_continuation); +} + +#endif // SERIALGC +///////////////////////////////////////////////////////////////////////////// #undef __ diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -294,6 +294,8 @@ } if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); __ move(value.result(), array_addr, null_check_info); // Seems to be a precise post_barrier(LIR_OprFact::address(array_addr), value.result()); @@ -745,7 +747,10 @@ __ move(obj.result(), addr); __ add(addr, offset.result(), addr); - + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, false, NULL); + } LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience if (type == objectType) @@ -1250,6 +1255,8 @@ LIR_Address* addr = new LIR_Address(src, offset, type); bool is_obj = (type == T_ARRAY || type == T_OBJECT); if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), false, NULL); __ move(data, addr); assert(src->is_register(), "must be register"); // Seems to be a precise address diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_Runtime1_x86.cpp --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1385,6 +1385,136 @@ } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ movl(rax, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax); + __ should_not_reach_here(); + break; + } + + __ pushl(rax); + __ pushl(rdx); + + const Register pre_val = rax; + const Register thread = rax; + const Register tmp = rdx; + + __ get_thread(thread); + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + + __ cmpl(queue_index, 0); + __ jcc(Assembler::equal, runtime); + __ subl(queue_index, wordSize); + __ movl(tmp, buffer); + __ addl(tmp, queue_index); + // prev_val (rax) + f.load_argument(0, pre_val); + __ movl(Address(tmp, 0), pre_val); + __ jmp(done); + + __ bind(runtime); + // load the pre-value + __ pushl(rcx); + f.load_argument(0, rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread); + __ popl(rcx); + + __ bind(done); + __ popl(rdx); + __ popl(rax); + } + break; + + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + + // arg0: store_address + Address store_addr(rbp, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regsion. + // Must check to see if card is already dirty + + const Register card_index = rdx; + + const Register thread = rax; + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + __ pushl(rax); + __ pushl(rdx); + + __ movl(card_index, store_addr); + __ get_thread(rax); + __ shrl(card_index, CardTableModRefBS::card_shift); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + ExternalAddress cardtable((address)ct->byte_map_base); + Address index(noreg, card_index, Address::times_1); + const Register card_addr = rdx; + __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index))); + __ cmpb(Address(card_addr, 0), 0); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + __ movb(Address(card_addr, 0), 0); + + __ cmpl(queue_index, 0); + __ jcc(Assembler::equal, runtime); + __ subl(queue_index, wordSize); + + const Register buffer_addr = rbx; + __ pushl(rbx); + + __ movl(buffer_addr, buffer); + __ addl(buffer_addr, queue_index); + __ movl(Address(buffer_addr, 0), card_addr); + __ popl(rbx); + __ jmp(done); + + __ bind(runtime); + __ pushl(rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + __ popl(rcx); + + __ bind(done); + __ popl(rdx); + __ popl(rax); + + } + break; +#endif // !SERIALGC + default: { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ movl(rax, (int)id); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/interp_masm_x86_64.cpp --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -35,8 +35,13 @@ // Note: No need to save/restore bcp & locals (r13 & r14) pointer // since these are callee saved registers and no blocking/ // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use esi/edi as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. #ifdef ASSERT - save_bcp(); { Label L; cmpq(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int)NULL_WORD); @@ -49,24 +54,9 @@ // super call MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); // interpreter specific -#ifdef ASSERT - { - Label L; - cmpq(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r13 not callee saved?"); - bind(L); - } - { - Label L; - cmpq(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r14 not callee saved?"); - bind(L); - } -#endif + // Used to ASSERT that r13/r14 were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save thme here (see note above) the assert is invalid. } void InterpreterMacroAssembler::call_VM_base(Register oop_result, diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -711,7 +711,6 @@ // end - element count void gen_write_ref_array_pre_barrier(Register start, Register count) { assert_different_registers(start, count); -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: @@ -720,8 +719,8 @@ __ pushad(); // push registers __ pushl(count); __ pushl(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); + __ addl(rsp, wordSize * 2); __ popad(); } break; @@ -733,7 +732,6 @@ ShouldNotReachHere(); } -#endif // 0 - G1 only } @@ -749,20 +747,18 @@ BarrierSet* bs = Universe::heap()->barrier_set(); assert_different_registers(start, count); switch (bs->kind()) { -#if 0 // G1 only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pushad(); // push registers __ pushl(count); __ pushl(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); + __ addl(rsp, wordSize * 2); __ popad(); } break; -#endif // 0 G1 only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -1377,9 +1373,9 @@ Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); // Copy from low to high addresses, indexed from the end of each array. + gen_write_ref_array_pre_barrier(to, count); __ leal(end_from, end_from_addr); __ leal(end_to, end_to_addr); - gen_write_ref_array_pre_barrier(to, count); assert(length == count, ""); // else fix next line: __ negl(count); // negate and test the length __ jccb(Assembler::notZero, L_load_element); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1152,18 +1152,26 @@ // Destroy no registers! // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 - only - assert_different_registers(addr, c_rarg1); - assert_different_registers(count, c_rarg0); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pushaq(); // push registers - __ movq(c_rarg0, addr); - __ movq(c_rarg1, count); - __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre)); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ xchgq(c_rarg1, c_rarg0); + } else { + __ movq(c_rarg1, count); + __ movq(c_rarg0, addr); + } + + } else { + __ movq(c_rarg0, addr); + __ movq(c_rarg1, count); + } + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); __ popaq(); } break; @@ -1171,11 +1179,10 @@ case BarrierSet::CardTableExtension: case BarrierSet::ModRef: break; - default : + default: ShouldNotReachHere(); } -#endif // 0 G1 - only } // @@ -1192,7 +1199,6 @@ assert_different_registers(start, end, scratch); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: @@ -1205,11 +1211,10 @@ __ shrq(scratch, LogBytesPerWord); __ movq(c_rarg0, start); __ movq(c_rarg1, scratch); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); __ popaq(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -1230,8 +1235,12 @@ __ decrementq(count); __ jcc(Assembler::greaterEqual, L_loop); } - } - } + break; + default: + ShouldNotReachHere(); + + } + } // Copy big chunks forward // diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/templateTable_x86_32.cpp --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -107,6 +107,78 @@ //---------------------------------------------------------------------------------------------------- // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movl(rdx, obj.base()); + } + } else { + __ leal(rdx, obj); + } + __ get_thread(rcx); + __ save_bcp(); + __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg); + + // Do the actual store + // noreg means NULL + if (val == noreg) { + __ movl(Address(rdx, 0), NULL_WORD); + // No post barrier for NULL + } else { + __ movl(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi); + } + __ restore_bcp(); + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leal(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); return Address(rsi, offset); @@ -872,6 +944,8 @@ __ movl(rax, at_tos()); // Value __ movl(rcx, at_tos_p1()); // Index __ movl(rdx, at_tos_p2()); // Array + + Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); index_check_without_pop(rdx, rcx); // kills rbx, // do array store check - check for NULL value first __ testl(rax, rax); @@ -883,7 +957,7 @@ __ movl(rax, Address(rdx, oopDesc::klass_offset_in_bytes())); __ movl(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array+index*4+12 into a single register. Frees ECX. - __ leal(rdx, Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ leal(rdx, element_address); // Generate subtype check. Blows ECX. Resets EDI to locals. // Superklass in EAX. Subklass in EBX. @@ -895,15 +969,20 @@ // Come here on success __ bind(ok_is_subtype); - __ movl(rax, at_rsp()); // Value - __ movl(Address(rdx, 0), rax); - __ store_check(rdx); - __ jmpb(done); + + // Get the value to store + __ movl(rax, at_rsp()); + // and store it with appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); + + __ jmp(done); // Have a NULL in EAX, EDX=array, ECX=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ movl(Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax); + + // Store NULL, (noreg means NULL to do_oop_store) + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -1506,7 +1585,7 @@ // compute return address as bci in rax, __ leal(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset()))); __ subl(rax, Address(rcx, methodOopDesc::const_offset())); - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in rsi by the displacement in EDX __ addl(rsi, rdx); // Push return address __ push_i(rax); @@ -1517,7 +1596,7 @@ // Normal (non-jsr) branch handling - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in rsi by the displacement in EDX __ addl(rsi, rdx); assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); @@ -2426,11 +2505,12 @@ __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ movl(lo, rax ); - __ store_check(obj, lo); // Need to mark card + do_oop_store(_masm, lo, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx); } + __ jmp(Done); __ bind(notObj); @@ -2638,14 +2718,18 @@ case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax); break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } Label done; volatile_barrier( ); - __ jmpb(done); + // Barriers are so large that short branch doesn't reach! + __ jmp(done); // Same code as above, but don't need rdx to test for volatile. __ bind(notVolatile); @@ -2664,7 +2748,10 @@ case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax); break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } @@ -3019,8 +3106,6 @@ Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress heap_top((address)Universe::heap()->top_addr()); - __ get_cpool_and_tags(rcx, rax); // get instanceKlass __ movl(rcx, Address(rcx, rdx, Address::times_4, sizeof(constantPoolOopDesc))); @@ -3077,6 +3162,8 @@ if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress heap_top((address)Universe::heap()->top_addr()); + Label retry; __ bind(retry); __ mov32(rax, heap_top); diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/templateTable_x86_64.cpp --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -113,6 +113,69 @@ // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movq(rdx, obj.base()); + } + } else { + __ leaq(rdx, obj); + } + __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg); + if (val == noreg) { + __ store_heap_oop(Address(rdx, 0), NULL_WORD); + } else { + __ store_heap_oop(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, val, r8, rbx); + } + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leaq(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); @@ -558,8 +621,8 @@ // rdx: array index_check(rdx, rax); // kills rbx __ load_heap_oop(rax, Address(rdx, rax, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + UseCompressedOops ? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); } void TemplateTable::baload() { @@ -864,6 +927,11 @@ __ movq(rax, at_tos()); // value __ movl(rcx, at_tos_p1()); // index __ movq(rdx, at_tos_p2()); // array + + Address element_address(rdx, rcx, + UseCompressedOops? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + index_check(rdx, rcx); // kills rbx // do array store check - check for NULL value first __ testq(rax, rax); @@ -877,9 +945,7 @@ sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array + index*oopSize + 12 into a single register. Frees rcx. - __ leaq(rdx, Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ leaq(rdx, element_address); // Generate subtype check. Blows rcx, rdi // Superklass in rax. Subklass in rbx. @@ -891,18 +957,20 @@ // Come here on success __ bind(ok_is_subtype); - __ movq(rax, at_tos()); // Value - __ store_heap_oop(Address(rdx, 0), rax); - __ store_check(rdx); + + // Get the value we will store + __ movq(rax, at_tos()); + + // Now store using the appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); __ jmp(done); // Have a NULL in rax, rdx=array, ecx=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ store_heap_oop(Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT)), - rax); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -2394,8 +2462,10 @@ // atos __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ store_heap_oop(field, rax); - __ store_check(obj, field); // Need to mark card + + // Store into the field + do_oop_store(_masm, field, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx); } @@ -2582,8 +2652,7 @@ // access field switch (bytecode()) { case Bytecodes::_fast_aputfield: - __ store_heap_oop(field, rax); - __ store_check(rcx, field); + do_oop_store(_masm, field, rax, _bs->kind(), false); break; case Bytecodes::_fast_lputfield: __ movq(field, rax); @@ -2789,7 +2858,7 @@ __ andl(recv, 0xFF); if (TaggedStackInterpreter) __ shll(recv, 1); // index*2 __ movq(recv, Address(rsp, recv, Address::times_8, - -Interpreter::expr_offset_in_bytes(1))); + -Interpreter::expr_offset_in_bytes(1))); __ verify_oop(recv); } @@ -3042,8 +3111,6 @@ Label initialize_header; Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress top((address)Universe::heap()->top_addr()); - ExternalAddress end((address)Universe::heap()->end_addr()); __ get_cpool_and_tags(rsi, rax); // get instanceKlass @@ -3104,6 +3171,9 @@ if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress top((address)Universe::heap()->top_addr()); + ExternalAddress end((address)Universe::heap()->end_addr()); + const Register RtopAddr = rscratch1; const Register RendAddr = rscratch2; diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/os/linux/vm/os_linux.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1261,6 +1261,17 @@ return (1000 * 1000); } +// For now, we say that linux does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { timeval time; int status = gettimeofday(&time, NULL); diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1691,6 +1691,40 @@ } } +bool os::supports_vtime() { return true; } + +bool os::enable_vtime() { + int fd = open("/proc/self/ctl", O_WRONLY); + if (fd == -1) + return false; + + long cmd[] = { PCSET, PR_MSACCT }; + int res = write(fd, cmd, sizeof(long) * 2); + close(fd); + if (res != sizeof(long) * 2) + return false; + + return true; +} + +bool os::vtime_enabled() { + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) + return false; + + pstatus_t status; + int res = read(fd, (void*) &status, sizeof(pstatus_t)); + close(fd); + if (res != sizeof(pstatus_t)) + return false; + + return status.pr_flags & PR_MSACCT; +} + +double os::elapsedVTime() { + return (double)gethrvtime() / (double)hrtime_hz; +} + // Used internally for comparisons only // getTimeMillis guaranteed to not move backwards on Solaris jlong getTimeMillis() { @@ -2661,7 +2695,7 @@ return bottom; } -// Detect the topology change. Typically happens during CPU pluggin-unplugging. +// Detect the topology change. Typically happens during CPU plugging-unplugging. bool os::numa_topology_changed() { int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie()); if (is_stale != -1 && is_stale) { diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/os/windows/vm/os_windows.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -737,6 +737,17 @@ return result; } +// For now, we say that Windows does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { if (UseFakeTimers) { return fake_time++; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/adlc/formssel.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -3767,6 +3767,10 @@ int MatchRule::is_ideal_copy() const { if( _rChild ) { const char *opType = _rChild->_opType; +#if 1 + if( strcmp(opType,"CastIP")==0 ) + return 1; +#else if( strcmp(opType,"CastII")==0 ) return 1; // Do not treat *CastPP this way, because it @@ -3786,6 +3790,7 @@ // return 1; //if( strcmp(opType,"CastP2X")==0 ) // return 1; +#endif } if( is_chain_rule(_AD.globalNames()) && _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 ) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_CodeStubs.hpp --- a/src/share/vm/c1/c1_CodeStubs.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_CodeStubs.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -482,3 +482,81 @@ virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); } #endif // PRODUCT }; + +////////////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +// Code stubs for Garbage-First barriers. +class G1PreBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _pre_val; + LIR_PatchCode _patch_code; + CodeEmitInfo* _info; + + public: + // pre_val (a temporary register) must be a register; + // addr (the address of the field to be read) must be a LIR_Address + G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) : + _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info) + { + assert(_pre_val->is_register(), "should be temporary register"); + assert(_addr->is_address(), "should be the address of the field"); + } + + LIR_Opr addr() const { return _addr; } + LIR_Opr pre_val() const { return _pre_val; } + LIR_PatchCode patch_code() const { return _patch_code; } + CodeEmitInfo* info() const { return _info; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast + // path + if (_info != NULL) + visitor->do_slow_case(_info); + else + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_temp(_pre_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); } +#endif // PRODUCT +}; + +class G1PostBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _new_val; + + static jbyte* _byte_map_base; + static jbyte* byte_map_base_slow(); + static jbyte* byte_map_base() { + if (_byte_map_base == NULL) { + _byte_map_base = byte_map_base_slow(); + } + return _byte_map_base; + } + + public: + // addr (the address of the object head) and new_val must be registers. + G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { } + + LIR_Opr addr() const { return _addr; } + LIR_Opr new_val() const { return _new_val; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast path + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_input(_new_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); } +#endif // PRODUCT +}; + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////////////// diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -74,6 +74,7 @@ LIR_Assembler::LIR_Assembler(Compilation* c): _compilation(c) , _masm(c->masm()) + , _bs(Universe::heap()->barrier_set()) , _frame_map(c->frame_map()) , _current_block(NULL) , _pending_non_safepoint(NULL) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRAssembler.hpp --- a/src/share/vm/c1/c1_LIRAssembler.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -24,11 +24,13 @@ class Compilation; class ScopeValue; +class BarrierSet; class LIR_Assembler: public CompilationResourceObj { private: C1_MacroAssembler* _masm; CodeStubList* _slow_case_stubs; + BarrierSet* _bs; Compilation* _compilation; FrameMap* _frame_map; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -285,16 +285,7 @@ void LIRGenerator::init() { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); - -#ifdef _LP64 - _card_table_base = new LIR_Const((jlong)ct->byte_map_base); -#else - _card_table_base = new LIR_Const((jint)ct->byte_map_base); -#endif + _bs = Universe::heap()->barrier_set(); } @@ -1239,8 +1230,37 @@ // Various barriers +void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + // Do the pre-write barrier, if any. + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info); + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + // No pre barriers + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + // No pre barriers + break; + default : + ShouldNotReachHere(); + + } +} + void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - switch (Universe::heap()->barrier_set()->kind()) { + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_post_barrier(addr, new_val); + break; +#endif // SERIALGC case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: CardTableModRef_post_barrier(addr, new_val); @@ -1254,11 +1274,120 @@ } } +//////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + if (G1DisablePreBarrier) return; + + // First we test whether marking is in progress. + BasicType flag_type; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + flag_type = T_INT; + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + flag_type = T_BYTE; + } + LIR_Opr thrd = getThreadPointer(); + LIR_Address* mark_active_flag_addr = + new LIR_Address(thrd, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + flag_type); + // Read the marking-in-progress flag. + LIR_Opr flag_val = new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); + + LabelObj* start_store = new LabelObj(); + + LIR_PatchCode pre_val_patch_code = + patch ? lir_patch_normal : lir_patch_none; + + LIR_Opr pre_val = new_register(T_OBJECT); + + __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + if (!addr_opr->is_address()) { + assert(addr_opr->is_register(), "must be"); + addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT)); + } + CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code, + info); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { + if (G1DisablePostBarrier) return; + + // If the "new_val" is a constant NULL, no barrier is necessary. + if (new_val->is_constant() && + new_val->as_constant_ptr()->as_jobject() == NULL) return; + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + if (new_val->is_constant()) { + __ move(new_val, new_val_reg); + } else { + __ leal(new_val, new_val_reg); + } + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + if (addr->is_address()) { + LIR_Address* address = addr->as_address_ptr(); + LIR_Opr ptr = new_pointer_register(); + if (!address->index()->is_valid() && address->disp() == 0) { + __ move(address->base(), ptr); + } else { + assert(address->disp() != max_jint, "lea doesn't support patched addresses!"); + __ leal(addr, ptr); + } + addr = ptr; + } + assert(addr->is_register(), "must be a register at this point"); + + LIR_Opr xor_res = new_pointer_register(); + LIR_Opr xor_shift_res = new_pointer_register(); + + if (TwoOperandLIRForm ) { + __ move(addr, xor_res); + __ logical_xor(xor_res, new_val, xor_res); + __ move(xor_res, xor_shift_res); + __ unsigned_shift_right(xor_shift_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } else { + __ logical_xor(addr, new_val, xor_res); + __ unsigned_shift_right(xor_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + __ leal(new_val, new_val_reg); + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); + + CodeStub* slow = new G1PostBarrierStub(addr, new_val); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +#endif // SERIALGC +//////////////////////////////////////////////////////////////////////// + void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); - LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base); + assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); + LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base); if (addr->is_address()) { LIR_Address* address = addr->as_address_ptr(); LIR_Opr ptr = new_register(T_OBJECT); @@ -1388,6 +1517,13 @@ __ membar_release(); } + if (is_oop) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(address), + needs_patching, + (info ? new CodeEmitInfo(info) : NULL)); + } + if (is_volatile) { assert(!needs_patching && x->is_loaded(), "how do we know it's volatile if it's not loaded"); @@ -1398,7 +1534,12 @@ } if (is_oop) { +#ifdef PRECISE_CARDMARK + // Precise cardmarks don't work + post_barrier(LIR_OprFact::address(address), value.result()); +#else post_barrier(object.result(), value.result()); +#endif // PRECISE_CARDMARK } if (is_volatile && os::is_MP()) { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRGenerator.hpp --- a/src/share/vm/c1/c1_LIRGenerator.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_LIRGenerator.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -145,6 +145,7 @@ // only the classes below belong in the same file class LIRGenerator: public InstructionVisitor, public BlockClosure { + private: Compilation* _compilation; ciMethod* _method; // method that we are compiling @@ -154,6 +155,7 @@ Values _instruction_for_operand; BitMap2D _vreg_flags; // flags which can be set on a per-vreg basis LIR_List* _lir; + BarrierSet* _bs; LIRGenerator* gen() { return this; @@ -174,8 +176,6 @@ LIR_OprList _reg_for_constants; Values _unpinned_constants; - LIR_Const* _card_table_base; - friend class PhiResolver; // unified bailout support @@ -196,8 +196,6 @@ LIR_Opr load_constant(Constant* x); LIR_Opr load_constant(LIR_Const* constant); - LIR_Const* card_table_base() const { return _card_table_base; } - void set_result(Value x, LIR_Opr opr) { assert(opr->is_valid(), "must set to valid value"); assert(x->operand()->is_illegal(), "operand should never change"); @@ -253,12 +251,17 @@ // generic interface + void pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); // specific implementations + // pre barriers + + void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); // post barriers + void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_Runtime1.cpp --- a/src/share/vm/c1/c1_Runtime1.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_Runtime1.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -168,6 +168,8 @@ switch (id) { // These stubs don't need to have an oopmap case dtrace_object_alloc_id: + case g1_pre_barrier_slow_id: + case g1_post_barrier_slow_id: case slow_subtype_check_id: case fpu2long_stub_id: case unwind_exception_id: diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_Runtime1.hpp --- a/src/share/vm/c1/c1_Runtime1.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_Runtime1.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -56,6 +56,8 @@ stub(access_field_patching) \ stub(load_klass_patching) \ stub(jvmti_exception_throw) \ + stub(g1_pre_barrier_slow) \ + stub(g1_post_barrier_slow) \ stub(fpu2long_stub) \ stub(counter_overflow) \ last_entry(number_of_ids) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_globals.hpp --- a/src/share/vm/c1/c1_globals.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/c1/c1_globals.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -213,9 +213,6 @@ develop(bool, UseFastLocking, true, \ "Use fast inlined locking code") \ \ - product(bool, FastTLABRefill, true, \ - "Use fast TLAB refill code") \ - \ develop(bool, UseSlowPath, false, \ "For debugging: test slow cases by always using them") \ \ diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/compiler/methodLiveness.cpp --- a/src/share/vm/compiler/methodLiveness.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/compiler/methodLiveness.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -76,8 +76,9 @@ BitCounter() : _count(0) {} // Callback when bit in map is set - virtual void do_bit(size_t offset) { + virtual bool do_bit(size_t offset) { _count++; + return true; } int count() { @@ -467,7 +468,7 @@ bci = 0; } - MethodLivenessResult answer(NULL,0); + MethodLivenessResult answer((uintptr_t*)NULL,0); if (_block_count > 0) { if (TimeLivenessAnalysis) _time_total.start(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/compiler/methodLiveness.hpp --- a/src/share/vm/compiler/methodLiveness.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/compiler/methodLiveness.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -29,7 +29,7 @@ bool _is_valid; public: - MethodLivenessResult(uintptr_t* map, idx_t size_in_bits) + MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits) : BitMap(map, size_in_bits) , _is_valid(false) {} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -790,7 +790,7 @@ } -HeapWord* CompactibleFreeListSpace::block_start(const void* p) const { +HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const { NOT_PRODUCT(verify_objects_initialized()); return _bt.block_start(p); } @@ -2285,9 +2285,9 @@ } void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const { - guarantee(size % 2 == 0, "Odd slots should be empty"); - for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL; - fc = fc->next()) { + FreeChunk* fc = _indexedFreeList[size].head(); + guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty"); + for (; fc != NULL; fc = fc->next()) { guarantee(fc->size() == size, "Size inconsistency"); guarantee(fc->isFree(), "!free?"); guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list"); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -502,7 +502,7 @@ void blk_iterate(BlkClosure* cl); void blk_iterate_careful(BlkClosureCareful* cl); - HeapWord* block_start(const void* p) const; + HeapWord* block_start_const(const void* p) const; HeapWord* block_start_careful(const void* p) const; size_t block_size(const HeapWord* p) const; size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -2751,13 +2751,14 @@ public: VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {} - void do_bit(size_t offset) { + bool do_bit(size_t offset) { HeapWord* addr = _marks->offsetToHeapWord(offset); if (!_marks->isMarked(addr)) { oop(addr)->print(); gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); _failed = true; } + return true; } bool failed() { return _failed; } @@ -4645,8 +4646,11 @@ startTimer(); sample_eden(); // Get and clear dirty region from card table - dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean( - MemRegion(nextAddr, endAddr)); + dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset( + MemRegion(nextAddr, endAddr), + true, + CardTableModRefBS::precleaned_card_val()); + assert(dirtyRegion.start() >= nextAddr, "returned region inconsistent?"); } @@ -5414,8 +5418,8 @@ &mrias_cl); { TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty); - // Iterate over the dirty cards, marking them precleaned, and - // setting the corresponding bits in the mod union table. + // Iterate over the dirty cards, setting the corresponding bits in the + // mod union table. { ModUnionClosure modUnionClosure(&_modUnionTable); _ct->ct_bs()->dirty_card_iterate( @@ -6187,7 +6191,7 @@ // bit vector itself. That is done by a separate call CMSBitMap::allocate() // further below. CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name): - _bm(NULL,0), + _bm(), _shifter(shifter), _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL) { @@ -6212,7 +6216,7 @@ } assert(_virtual_space.committed_size() == brs.size(), "didn't reserve backing store for all of CMS bit map?"); - _bm.set_map((uintptr_t*)_virtual_space.low()); + _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= _bmWordSize, "inconsistency in bit map sizing"); _bm.set_size(_bmWordSize >> _shifter); @@ -6853,10 +6857,10 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsClosure::do_bit(size_t offset) { +bool MarkFromRootsClosure::do_bit(size_t offset) { if (_skipBits > 0) { _skipBits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bitMap->startWord() + offset; @@ -6896,10 +6900,11 @@ } // ...else the setting of klass will dirty the card anyway. } DEBUG_ONLY(}) - return; + return true; } } scanOopsInOop(addr); + return true; } // We take a break if we've been at this for a while, @@ -7033,10 +7038,10 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void Par_MarkFromRootsClosure::do_bit(size_t offset) { +bool Par_MarkFromRootsClosure::do_bit(size_t offset) { if (_skip_bits > 0) { _skip_bits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bit_map->startWord() + offset; @@ -7051,10 +7056,11 @@ if (p->klass() == NULL || !p->is_parsable()) { // in the case of Clean-on-Enter optimization, redirty card // and avoid clearing card by increasing the threshold. - return; + return true; } } scan_oops_in_oop(addr); + return true; } void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) { @@ -7177,7 +7183,7 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsVerifyClosure::do_bit(size_t offset) { +bool MarkFromRootsVerifyClosure::do_bit(size_t offset) { // convert offset into a HeapWord* HeapWord* addr = _verification_bm->startWord() + offset; assert(_verification_bm->endWord() && addr < _verification_bm->endWord(), @@ -7205,6 +7211,7 @@ new_oop->oop_iterate(&_pam_verify_closure); } assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition"); + return true; } PushAndMarkVerifyClosure::PushAndMarkVerifyClosure( @@ -7448,8 +7455,12 @@ // Grey object rescan during pre-cleaning and second checkpoint phases -- // the non-parallel version (the parallel version appears further below.) void PushAndMarkClosure::do_oop(oop obj) { - // If _concurrent_precleaning, ignore mark word verification - assert(obj->is_oop_or_null(_concurrent_precleaning), + // Ignore mark word verification. If during concurrent precleaning, + // the object monitor may be locked. If during the checkpoint + // phases, the object may already have been reached by a different + // path and may be at the end of the global overflow list (so + // the mark word may be NULL). + assert(obj->is_oop_or_null(true /* ignore mark word */), "expected an oop or NULL"); HeapWord* addr = (HeapWord*)obj; // Check if oop points into the CMS generation diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -1325,7 +1325,7 @@ CMSMarkStack* markStack, CMSMarkStack* revisitStack, bool should_yield, bool verifying = false); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); inline void do_yield_check(); @@ -1361,7 +1361,7 @@ CMSMarkStack* overflow_stack, CMSMarkStack* revisit_stack, bool should_yield); - void do_bit(size_t offset); + bool do_bit(size_t offset); inline void do_yield_check(); private: @@ -1409,7 +1409,7 @@ CMSBitMap* verification_bm, CMSBitMap* cms_bm, CMSMarkStack* mark_stack); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); }; @@ -1418,8 +1418,9 @@ // "empty" (i.e. the bit vector doesn't have any 1-bits). class FalseBitMapClosure: public BitMapClosure { public: - void do_bit(size_t offset) { + bool do_bit(size_t offset) { guarantee(false, "Should not have a 1 bit"); + return true; } }; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,195 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A BufferingOops closure tries to separate out the cost of finding roots +// from the cost of applying closures to them. It maintains an array of +// ref-containing locations. Until the array is full, applying the closure +// to an oop* merely records that location in the array. Since this +// closure app cost is small, an elapsed timer can approximately attribute +// all of this cost to the cost of finding the roots. When the array fills +// up, the wrapped closure is applied to all elements, keeping track of +// this elapsed time of this process, and leaving the array empty. +// The caller must be sure to call "done" to process any unprocessed +// buffered entriess. + +class Generation; +class HeapRegion; + +class BufferingOopClosure: public OopClosure { +protected: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + OopClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + double start = os::elapsedTime(); + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + _oc->do_oop(*curr); + } + _buffer_curr = _buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + } + void done () { + if (_buffer_curr > _buffer) { + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopClosure (OopClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _closure_app_seconds(0.0) { } +}; + +class BufferingOopsInGenClosure: public OopsInGenClosure { + BufferingOopClosure _boc; + OopsInGenClosure* _oc; +public: + BufferingOopsInGenClosure(OopsInGenClosure *oc) : + _boc(oc), _oc(oc) {} + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + assert(generation()->is_in_reserved(p), "Must be in!"); + _boc.do_oop(p); + } + + void done() { + _boc.done(); + } + + double closure_app_seconds () { + return _boc.closure_app_seconds(); + } + + void set_generation(Generation* gen) { + OopsInGenClosure::set_generation(gen); + _oc->set_generation(gen); + } + + void reset_generation() { + // Make sure we finish the current work with the current generation. + _boc.done(); + OopsInGenClosure::reset_generation(); + _oc->reset_generation(); + } + +}; + + +class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure { +private: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + HeapRegion *_hr_buffer[BufferLength]; + HeapRegion **_hr_curr; + + OopsInHeapRegionClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer), + "the two lengths should be the same"); + + double start = os::elapsedTime(); + HeapRegion **hr_curr = _hr_buffer; + HeapRegion *hr_prev = NULL; + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + HeapRegion *region = *hr_curr; + if (region != hr_prev) { + _oc->set_region(region); + hr_prev = region; + } + _oc->do_oop(*curr); + ++hr_curr; + } + _buffer_curr = _buffer; + _hr_curr = _hr_buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop *p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + *_hr_curr = _from; + ++_hr_curr; + } + void done () { + if (_buffer_curr > _buffer) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _hr_curr(_hr_buffer), + _closure_app_seconds(0.0) { } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/collectionSetChooser.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,409 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_collectionSetChooser.cpp.incl" + +CSetChooserCache::CSetChooserCache() { + for (int i = 0; i < CacheLength; ++i) + _cache[i] = NULL; + clear(); +} + +void CSetChooserCache::clear() { + _occupancy = 0; + _first = 0; + for (int i = 0; i < CacheLength; ++i) { + HeapRegion *hr = _cache[i]; + if (hr != NULL) + hr->set_sort_index(-1); + _cache[i] = NULL; + } +} + +#ifndef PRODUCT +bool CSetChooserCache::verify() { + int index = _first; + HeapRegion *prev = NULL; + for (int i = 0; i < _occupancy; ++i) { + guarantee(_cache[index] != NULL, "cache entry should not be empty"); + HeapRegion *hr = _cache[index]; + guarantee(!hr->is_young(), "should not be young!"); + if (prev != NULL) { + guarantee(prev->gc_efficiency() >= hr->gc_efficiency(), + "cache should be correctly ordered"); + } + guarantee(hr->sort_index() == get_sort_index(index), + "sort index should be correct"); + index = trim_index(index + 1); + prev = hr; + } + + for (int i = 0; i < (CacheLength - _occupancy); ++i) { + guarantee(_cache[index] == NULL, "cache entry should be empty"); + index = trim_index(index + 1); + } + + guarantee(index == _first, "we should have reached where we started from"); + return true; +} +#endif // PRODUCT + +void CSetChooserCache::insert(HeapRegion *hr) { + assert(!is_full(), "cache should not be empty"); + hr->calc_gc_efficiency(); + + int empty_index; + if (_occupancy == 0) { + empty_index = _first; + } else { + empty_index = trim_index(_first + _occupancy); + assert(_cache[empty_index] == NULL, "last slot should be empty"); + int last_index = trim_index(empty_index - 1); + HeapRegion *last = _cache[last_index]; + assert(last != NULL,"as the cache is not empty, last should not be empty"); + while (empty_index != _first && + last->gc_efficiency() < hr->gc_efficiency()) { + _cache[empty_index] = last; + last->set_sort_index(get_sort_index(empty_index)); + empty_index = last_index; + last_index = trim_index(last_index - 1); + last = _cache[last_index]; + } + } + _cache[empty_index] = hr; + hr->set_sort_index(get_sort_index(empty_index)); + + ++_occupancy; + assert(verify(), "cache should be consistent"); +} + +HeapRegion *CSetChooserCache::remove_first() { + if (_occupancy > 0) { + assert(_cache[_first] != NULL, "cache should have at least one region"); + HeapRegion *ret = _cache[_first]; + _cache[_first] = NULL; + ret->set_sort_index(-1); + --_occupancy; + _first = trim_index(_first + 1); + assert(verify(), "cache should be consistent"); + return ret; + } else { + return NULL; + } +} + +// this is a bit expensive... but we expect that it should not be called +// to often. +void CSetChooserCache::remove(HeapRegion *hr) { + assert(_occupancy > 0, "cache should not be empty"); + assert(hr->sort_index() < -1, "should already be in the cache"); + int index = get_index(hr->sort_index()); + assert(_cache[index] == hr, "index should be correct"); + int next_index = trim_index(index + 1); + int last_index = trim_index(_first + _occupancy - 1); + while (index != last_index) { + assert(_cache[next_index] != NULL, "should not be null"); + _cache[index] = _cache[next_index]; + _cache[index]->set_sort_index(get_sort_index(index)); + + index = next_index; + next_index = trim_index(next_index+1); + } + assert(index == last_index, "should have reached the last one"); + _cache[index] = NULL; + hr->set_sort_index(-1); + --_occupancy; + assert(verify(), "cache should be consistent"); +} + +static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) { + if (hr1 == NULL) { + if (hr2 == NULL) return 0; + else return 1; + } else if (hr2 == NULL) { + return -1; + } + if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1; + else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1; + else return 0; +} + +static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) { + return orderRegions(*hr1p, *hr2p); +} + +CollectionSetChooser::CollectionSetChooser() : + // The line below is the worst bit of C++ hackery I've ever written + // (Detlefs, 11/23). You should think of it as equivalent to + // "_regions(100, true)": initialize the growable array and inform it + // that it should allocate its elem array(s) on the C heap. The first + // argument, however, is actually a comma expression (new-expr, 100). + // The purpose of the new_expr is to inform the growable array that it + // is *already* allocated on the C heap: it uses the placement syntax to + // keep it from actually doing any allocation. + _markedRegions((ResourceObj::operator new (sizeof(GrowableArray), + (void*)&_markedRegions, + ResourceObj::C_HEAP), + 100), + true), + _curMarkedIndex(0), + _numMarkedRegions(0), + _unmarked_age_1_returned_as_new(false), + _first_par_unreserved_idx(0) +{} + + + +#ifndef PRODUCT +bool CollectionSetChooser::verify() { + int index = 0; + guarantee(_curMarkedIndex <= _numMarkedRegions, + "_curMarkedIndex should be within bounds"); + while (index < _curMarkedIndex) { + guarantee(_markedRegions.at(index++) == NULL, + "all entries before _curMarkedIndex should be NULL"); + } + HeapRegion *prev = NULL; + while (index < _numMarkedRegions) { + HeapRegion *curr = _markedRegions.at(index++); + if (curr != NULL) { + int si = curr->sort_index(); + guarantee(!curr->is_young(), "should not be young!"); + guarantee(si > -1 && si == (index-1), "sort index invariant"); + if (prev != NULL) { + guarantee(orderRegions(prev, curr) != 1, "regions should be sorted"); + } + prev = curr; + } + } + return _cache.verify(); +} +#endif + +bool +CollectionSetChooser::addRegionToCache() { + assert(!_cache.is_full(), "cache should not be full"); + + HeapRegion *hr = NULL; + while (hr == NULL && _curMarkedIndex < _numMarkedRegions) { + hr = _markedRegions.at(_curMarkedIndex++); + } + if (hr == NULL) + return false; + assert(!hr->is_young(), "should not be young!"); + assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant"); + _markedRegions.at_put(hr->sort_index(), NULL); + _cache.insert(hr); + assert(!_cache.is_empty(), "cache should not be empty"); + assert(verify(), "cache should be consistent"); + return false; +} + +void +CollectionSetChooser::fillCache() { + while (!_cache.is_full() && addRegionToCache()) { + } +} + +void +CollectionSetChooser::sortMarkedHeapRegions() { + guarantee(_cache.is_empty(), "cache should be empty"); + // First trim any unused portion of the top in the parallel case. + if (_first_par_unreserved_idx > 0) { + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Truncating _markedRegions from %d to %d.\n", + _markedRegions.length(), _first_par_unreserved_idx); + } + assert(_first_par_unreserved_idx <= _markedRegions.length(), + "Or we didn't reserved enough length"); + _markedRegions.trunc_to(_first_par_unreserved_idx); + } + _markedRegions.sort(orderRegions); + assert(_numMarkedRegions <= _markedRegions.length(), "Requirement"); + assert(_numMarkedRegions == 0 + || _markedRegions.at(_numMarkedRegions-1) != NULL, + "Testing _numMarkedRegions"); + assert(_numMarkedRegions == _markedRegions.length() + || _markedRegions.at(_numMarkedRegions) == NULL, + "Testing _numMarkedRegions"); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" Sorted %d marked regions.", _numMarkedRegions); + } + for (int i = 0; i < _numMarkedRegions; i++) { + assert(_markedRegions.at(i) != NULL, "Should be true by sorting!"); + _markedRegions.at(i)->set_sort_index(i); + if (G1PrintRegionLivenessInfo > 0) { + if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:"); + if (i < G1PrintRegionLivenessInfo || + (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) { + HeapRegion* hr = _markedRegions.at(i); + size_t u = hr->used(); + gclog_or_tty->print_cr(" Region %d: %d used, %d max live, %5.2f%%.", + i, u, hr->max_live_bytes(), + 100.0*(float)hr->max_live_bytes()/(float)u); + } + } + } + if (G1PolicyVerbose > 1) + printSortedHeapRegions(); + assert(verify(), "should now be sorted"); +} + +void +printHeapRegion(HeapRegion *hr) { + if (hr->isHumongous()) + gclog_or_tty->print("H: "); + if (hr->in_collection_set()) + gclog_or_tty->print("CS: "); + if (hr->popular()) + gclog_or_tty->print("pop: "); + gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) " + "[" PTR_FORMAT ", " PTR_FORMAT"] " + "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.", + hr, hr->is_young() ? "Y " : " ", + hr->is_marked()? "M1" : "M0", + hr->bottom(), hr->end(), + hr->used()/K, hr->garbage_bytes()/K); +} + +void +CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) { + assert(!hr->isHumongous(), + "Humongous regions shouldn't be added to the collection set"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.append(hr); + _numMarkedRegions++; + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser:: +prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) { + _first_par_unreserved_idx = 0; + size_t max_waste = ParallelGCThreads * chunkSize; + // it should be aligned with respect to chunkSize + size_t aligned_n_regions = + (n_regions + (chunkSize - 1)) / chunkSize * chunkSize; + assert( aligned_n_regions % chunkSize == 0, "should be aligned" ); + _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL); +} + +jint +CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) { + jint res = Atomic::add(n_regions, &_first_par_unreserved_idx); + assert(_markedRegions.length() > res + n_regions - 1, + "Should already have been expanded"); + return res - n_regions; +} + +void +CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) { + assert(_markedRegions.at(index) == NULL, "precondition"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.at_put(index, hr); + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) { + (void)Atomic::add(inc_by, &_numMarkedRegions); +} + +void +CollectionSetChooser::clearMarkedHeapRegions(){ + for (int i = 0; i < _markedRegions.length(); i++) { + HeapRegion* r = _markedRegions.at(i); + if (r != NULL) r->set_sort_index(-1); + } + _markedRegions.clear(); + _curMarkedIndex = 0; + _numMarkedRegions = 0; + _cache.clear(); +}; + +void +CollectionSetChooser::updateAfterFullCollection() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + clearMarkedHeapRegions(); +} + +void +CollectionSetChooser::printSortedHeapRegions() { + gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage", + _numMarkedRegions); + for (int i = 0; i < _markedRegions.length(); i++) { + printHeapRegion(_markedRegions.at(i)); + } + gclog_or_tty->print_cr("Done sorted heap region print"); +} + +void CollectionSetChooser::removeRegion(HeapRegion *hr) { + int si = hr->sort_index(); + assert(si == -1 || hr->is_marked(), "Sort index not valid."); + if (si > -1) { + assert(_markedRegions.at(si) == hr, "Sort index not valid." ); + _markedRegions.at_put(si, NULL); + } else if (si < -1) { + assert(_cache.region_in_cache(hr), "should be in the cache"); + _cache.remove(hr); + assert(hr->sort_index() == -1, "sort index invariant"); + } + hr->set_sort_index(-1); +} + +// if time_remaining < 0.0, then this method should try to return +// a region, whether it fits within the remaining time or not +HeapRegion* +CollectionSetChooser::getNextMarkedRegion(double time_remaining, + double avg_prediction) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + fillCache(); + if (_cache.is_empty()) { + assert(_curMarkedIndex == _numMarkedRegions, + "if cache is empty, list should also be empty"); + return NULL; + } + + HeapRegion *hr = _cache.get_first(); + assert(hr != NULL, "if cache not empty, first entry should be non-null"); + double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false); + + if (g1p->adaptive_young_list_length()) { + if (time_remaining - predicted_time < 0.0) { + g1h->check_if_region_is_too_expensive(predicted_time); + return NULL; + } + } else { + if (predicted_time > 2.0 * avg_prediction) { + return NULL; + } + } + + HeapRegion *hr2 = _cache.remove_first(); + assert(hr == hr2, "cache contents should not have changed"); + + return hr; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/collectionSetChooser.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,138 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// We need to sort heap regions by collection desirability. + +class CSetChooserCache { +private: + enum { + CacheLength = 16 + } PrivateConstants; + + HeapRegion* _cache[CacheLength]; + int _occupancy; // number of region in cache + int _first; // "first" region in the cache + + // adding CacheLength to deal with negative values + inline int trim_index(int index) { + return (index + CacheLength) % CacheLength; + } + + inline int get_sort_index(int index) { + return -index-2; + } + inline int get_index(int sort_index) { + return -sort_index-2; + } + +public: + CSetChooserCache(void); + + inline int occupancy(void) { return _occupancy; } + inline bool is_full() { return _occupancy == CacheLength; } + inline bool is_empty() { return _occupancy == 0; } + + void clear(void); + void insert(HeapRegion *hr); + HeapRegion *remove_first(void); + void remove (HeapRegion *hr); + inline HeapRegion *get_first(void) { + return _cache[_first]; + } + +#ifndef PRODUCT + bool verify (void); + bool region_in_cache(HeapRegion *hr) { + int sort_index = hr->sort_index(); + if (sort_index < -1) { + int index = get_index(sort_index); + guarantee(index < CacheLength, "should be within bounds"); + return _cache[index] == hr; + } else + return 0; + } +#endif // PRODUCT +}; + +class CollectionSetChooser: public CHeapObj { + + GrowableArray _markedRegions; + int _curMarkedIndex; + int _numMarkedRegions; + CSetChooserCache _cache; + + // True iff last collection pause ran of out new "age 0" regions, and + // returned an "age 1" region. + bool _unmarked_age_1_returned_as_new; + + jint _first_par_unreserved_idx; + +public: + + HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction); + + CollectionSetChooser(); + + void printSortedHeapRegions(); + + void sortMarkedHeapRegions(); + void fillCache(); + bool addRegionToCache(void); + void addMarkedHeapRegion(HeapRegion *hr); + + // Must be called before calls to getParMarkedHeapRegionChunk. + // "n_regions" is the number of regions, "chunkSize" the chunk size. + void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize); + // Returns the first index in a contiguous chunk of "n_regions" indexes + // that the calling thread has reserved. These must be set by the + // calling thread using "setMarkedHeapRegion" (to NULL if necessary). + jint getParMarkedHeapRegionChunk(jint n_regions); + // Set the marked array entry at index to hr. Careful to claim the index + // first if in parallel. + void setMarkedHeapRegion(jint index, HeapRegion* hr); + // Atomically increment the number of claimed regions by "inc_by". + void incNumMarkedHeapRegions(jint inc_by); + + void clearMarkedHeapRegions(); + + void updateAfterFullCollection(); + + // Ensure that "hr" is not a member of the marked region array or the cache + void removeRegion(HeapRegion* hr); + + bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; } + + // Returns true if the used portion of "_markedRegions" is properly + // sorted, otherwise asserts false. +#ifndef PRODUCT + bool verify(void); + bool regionProperlyOrdered(HeapRegion* r) { + int si = r->sort_index(); + return (si == -1) || + (si > -1 && _markedRegions.at(si) == r) || + (si < -1 && _cache.region_in_cache(r)); + } +#endif + +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,355 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1Refine.cpp.incl" + +bool ConcurrentG1Refine::_enabled = false; + +ConcurrentG1Refine::ConcurrentG1Refine() : + _pya(PYA_continue), _last_pya(PYA_continue), + _last_cards_during(), _first_traversal(false), + _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), + _hot_cache(NULL), + _def_use_cache(false), _use_cache(false), + _n_periods(0), _total_cards(0), _total_travs(0) +{ + if (G1ConcRefine) { + _cg1rThread = new ConcurrentG1RefineThread(this); + assert(cg1rThread() != NULL, "Conc refine should have been created"); + assert(cg1rThread()->cg1r() == this, + "Conc refine thread should refer to this"); + } else { + _cg1rThread = NULL; + } +} + +void ConcurrentG1Refine::init() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _n_card_counts = + (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); + _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); + for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; + ModRefBarrierSet* bs = g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + CardTableModRefBS* ctbs = (CardTableModRefBS*)bs; + _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start()); + if (G1ConcRSCountTraversals) { + _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + for (int i = 0; i < 256; i++) { + _cur_card_count_histo[i] = 0; + _cum_card_count_histo[i] = 0; + } + } + } + if (G1ConcRSLogCacheSize > 0) { + _def_use_cache = true; + _use_cache = true; + _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); + _n_hot = 0; + _hot_cache_idx = 0; + } +} + +ConcurrentG1Refine::~ConcurrentG1Refine() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + assert(_card_counts != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned char, _card_counts); + assert(_cur_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo); + assert(_cum_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo); + } + if (G1ConcRSLogCacheSize > 0) { + assert(_hot_cache != NULL, "Logic"); + FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); + } +} + +bool ConcurrentG1Refine::refine() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); + clear_hot_cache(); // Any previous values in this are now invalid. + g1h->g1_rem_set()->concurrentRefinementPass(this); + _traversals++; + unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); + unsigned cards_during = cards_after-cards_before; + // If this is the first traversal in the current enabling + // and we did some cards, or if the number of cards found is decreasing + // sufficiently quickly, then keep going. Otherwise, sleep a while. + bool res = + (_first_traversal && cards_during > 0) + || + (!_first_traversal && cards_during * 3 < _last_cards_during * 2); + _last_cards_during = cards_during; + _first_traversal = false; + return res; +} + +void ConcurrentG1Refine::enable() { + MutexLocker x(G1ConcRefine_mon); + if (!_enabled) { + _enabled = true; + _first_traversal = true; _last_cards_during = 0; + G1ConcRefine_mon->notify_all(); + } +} + +unsigned ConcurrentG1Refine::disable() { + MutexLocker x(G1ConcRefine_mon); + if (_enabled) { + _enabled = false; + return _traversals; + } else { + return 0; + } +} + +void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { + G1ConcRefine_mon->lock(); + while (!_enabled) { + G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); + } + G1ConcRefine_mon->unlock(); + _traversals = 0; +}; + +void ConcurrentG1Refine::set_pya_restart() { + // If we're using the log-based RS barrier, the above will cause + // in-progress traversals of completed log buffers to quit early; we will + // also abandon all other buffers. + if (G1RSBarrierUseQueue) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.abandon_logs(); + if (_cg1rThread->do_traversal()) { + _pya = PYA_restart; + } else { + _cg1rThread->set_do_traversal(true); + // Reset the post-yield actions. + _pya = PYA_continue; + _last_pya = PYA_continue; + } + } else { + _pya = PYA_restart; + } +} + +void ConcurrentG1Refine::set_pya_cancel() { + _pya = PYA_cancel; +} + +PostYieldAction ConcurrentG1Refine::get_pya() { + if (_pya != PYA_continue) { + jint val = _pya; + while (true) { + jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); + if (val_read == val) { + PostYieldAction res = (PostYieldAction)val; + assert(res != PYA_continue, "Only the refine thread should reset."); + _last_pya = res; + return res; + } else { + val = val_read; + } + } + } + // QQQ WELL WHAT DO WE RETURN HERE??? + // make up something! + return PYA_continue; +} + +PostYieldAction ConcurrentG1Refine::get_last_pya() { + PostYieldAction res = _last_pya; + _last_pya = PYA_continue; + return res; +} + +bool ConcurrentG1Refine::do_traversal() { + return _cg1rThread->do_traversal(); +} + +int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { + size_t card_num = (card_ptr - _ct_bot); + guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds"); + unsigned char cnt = _card_counts[card_num]; + if (cnt < 255) _card_counts[card_num]++; + return cnt; + _total_travs++; +} + +jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { + int count = add_card_count(card_ptr); + // Count previously unvisited cards. + if (count == 0) _total_cards++; + // We'll assume a traversal unless we store it in the cache. + if (count < G1ConcRSHotCardLimit) { + _total_travs++; + return card_ptr; + } + // Otherwise, it's hot. + jbyte* res = NULL; + MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); + if (_n_hot == _hot_cache_size) { + _total_travs++; + res = _hot_cache[_hot_cache_idx]; + _n_hot--; + } + // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. + _hot_cache[_hot_cache_idx] = card_ptr; + _hot_cache_idx++; + if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; + _n_hot++; + return res; +} + + +void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { + assert(!use_cache(), "cache should be disabled"); + int start_ind = _hot_cache_idx-1; + for (int i = 0; i < _n_hot; i++) { + int ind = start_ind - i; + if (ind < 0) ind = ind + _hot_cache_size; + jbyte* entry = _hot_cache[ind]; + if (entry != NULL) { + g1rs->concurrentRefineOneCard(entry, worker_i); + } + } + _n_hot = 0; + _hot_cache_idx = 0; +} + +void ConcurrentG1Refine::clear_and_record_card_counts() { + if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; + _n_periods++; + if (G1ConcRSCountTraversals) { + for (size_t i = 0; i < _n_card_counts; i++) { + unsigned char bucket = _card_counts[i]; + _cur_card_count_histo[bucket]++; + _card_counts[i] = 0; + } + gclog_or_tty->print_cr("Card counts:"); + for (int i = 0; i < 256; i++) { + if (_cur_card_count_histo[i] > 0) { + gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]); + _cum_card_count_histo[i] += _cur_card_count_histo[i]; + _cur_card_count_histo[i] = 0; + } + } + } else { + assert(G1ConcRSLogCacheSize > 0, "Logic"); + Copy::fill_to_words((HeapWord*)(&_card_counts[0]), + _n_card_counts / HeapWordSize); + } +} + +void +ConcurrentG1Refine:: +print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct) { + unsigned cards = 0; + unsigned travs = 0; + guarantee(to <= 256, "Precondition"); + for (int i = from; i < to-1; i++) { + cards += histo[i]; + travs += histo[i] * i; + } + if (to == 256) { + unsigned histo_card_sum = 0; + unsigned histo_trav_sum = 0; + for (int i = 1; i < 255; i++) { + histo_trav_sum += histo[i] * i; + } + cards += histo[255]; + // correct traversals for the last one. + unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum); + travs += travs_255; + + } else { + cards += histo[to-1]; + travs += histo[to-1] * (to-1); + } + float fperiods = (float)_n_periods; + float f_tot_cards = (float)_total_cards/fperiods; + float f_tot_travs = (float)_total_travs/fperiods; + if (cards > 0) { + float fcards = (float)cards/fperiods; + float ftravs = (float)travs/fperiods; + if (to == 256) { + gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs); + } else { + gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs); + } + float pct_cards = fcards*100.0/f_tot_cards; + cum_card_pct += pct_cards; + float pct_travs = ftravs*100.0/f_tot_travs; + cum_travs_pct += pct_travs; + gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f", + pct_cards, cum_card_pct, + pct_travs, cum_travs_pct); + } +} + +void ConcurrentG1Refine::print_final_card_counts() { + if (!G1ConcRSCountTraversals) return; + + gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.", + _total_travs, _total_cards); + float fperiods = (float)_n_periods; + gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, " + "per collection.", (float)_total_travs/fperiods, + (float)_total_cards/fperiods); + gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct " + "dirty card.\n", + _total_cards > 0 ? + (float)_total_travs/(float)_total_cards : 0.0); + + + gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s", + "range", "# cards", "# travs", "% cards", "(cum)", + "% travs", "(cum)"); + gclog_or_tty->print_cr("------------------------------------------------------------" + "-------------"); + float cum_cards_pct = 0.0; + float cum_travs_pct = 0.0; + for (int i = 1; i < 10; i++) { + print_card_count_histo_range(_cum_card_count_histo, i, i+1, + cum_cards_pct, cum_travs_pct); + } + for (int i = 10; i < 100; i += 10) { + print_card_count_histo_range(_cum_card_count_histo, i, i+10, + cum_cards_pct, cum_travs_pct); + } + print_card_count_histo_range(_cum_card_count_histo, 100, 150, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 200, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 255, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 255, 256, + cum_cards_pct, cum_travs_pct); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,132 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward decl +class ConcurrentG1RefineThread; +class G1RemSet; + +// What to do after a yield: +enum PostYieldAction { + PYA_continue, // Continue the traversal + PYA_restart, // Restart + PYA_cancel // It's been completed by somebody else: cancel. +}; + +class ConcurrentG1Refine { + ConcurrentG1RefineThread* _cg1rThread; + + volatile jint _pya; + PostYieldAction _last_pya; + + static bool _enabled; // Protected by G1ConcRefine_mon. + unsigned _traversals; + + // Number of cards processed during last refinement traversal. + unsigned _first_traversal; + unsigned _last_cards_during; + + // The cache for card refinement. + bool _use_cache; + bool _def_use_cache; + size_t _n_periods; + size_t _total_cards; + size_t _total_travs; + + unsigned char* _card_counts; + unsigned _n_card_counts; + const jbyte* _ct_bot; + unsigned* _cur_card_count_histo; + unsigned* _cum_card_count_histo; + jbyte** _hot_cache; + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + // Returns the count of this card after incrementing it. + int add_card_count(jbyte* card_ptr); + + void print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct); + public: + ConcurrentG1Refine(); + ~ConcurrentG1Refine(); + + void init(); // Accomplish some initialization that has to wait. + + // Enabled Conc refinement, waking up thread if necessary. + void enable(); + + // Returns the number of traversals performed since this refiner was enabled. + unsigned disable(); + + // Requires G1ConcRefine_mon to be held. + bool enabled() { return _enabled; } + + // Returns only when G1 concurrent refinement has been enabled. + void wait_for_ConcurrentG1Refine_enabled(); + + // Do one concurrent refinement pass over the card table. Returns "true" + // if heuristics determine that another pass should be done immediately. + bool refine(); + + // Indicate that an in-progress refinement pass should start over. + void set_pya_restart(); + // Indicate that an in-progress refinement pass should quit. + void set_pya_cancel(); + + // Get the appropriate post-yield action. Also sets last_pya. + PostYieldAction get_pya(); + + // The last PYA read by "get_pya". + PostYieldAction get_last_pya(); + + bool do_traversal(); + + ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } + + // If this is the first entry for the slot, writes into the cache and + // returns NULL. If it causes an eviction, returns the evicted pointer. + // Otherwise, its a cache hit, and returns NULL. + jbyte* cache_insert(jbyte* card_ptr); + + // Process the cached entries. + void clean_up_cache(int worker_i, G1RemSet* g1rs); + + // Discard entries in the hot cache. + void clear_hot_cache() { + _hot_cache_idx = 0; _n_hot = 0; + } + + bool hot_cache_is_empty() { return _n_hot == 0; } + + bool use_cache() { return _use_cache; } + void set_use_cache(bool b) { + if (b) _use_cache = _def_use_cache; + else _use_cache = false; + } + + void clear_and_record_card_counts(); + void print_final_card_counts(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,246 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1RefineThread.cpp.incl" + +// ======= Concurrent Mark Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +ConcurrentG1RefineThread:: +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : + ConcurrentGCThread(), + _cg1r(cg1r), + _started(false), + _in_progress(false), + _do_traversal(false), + _vtime_accum(0.0), + _co_tracker(G1CRGroup), + _interval_ms(5.0) +{ + create_and_start(); +} + +const long timeout = 200; // ms. + +void ConcurrentG1RefineThread::traversalBasedRefinement() { + _cg1r->wait_for_ConcurrentG1Refine_enabled(); + MutexLocker x(G1ConcRefine_mon); + while (_cg1r->enabled()) { + MutexUnlocker ux(G1ConcRefine_mon); + ResourceMark rm; + HandleMark hm; + + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass"); + _sts.join(); + bool no_sleep = _cg1r->refine(); + _sts.leave(); + if (!no_sleep) { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + // We do this only for the timeout; we don't expect this to be signalled. + CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); + } + } +} + +void ConcurrentG1RefineThread::queueBasedRefinement() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + // Wait for completed log buffers to exist. + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + while (!_do_traversal && !dcqs.process_completed_buffers() && + !_should_terminate) { + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); + } + } + + if (_should_terminate) { + return; + } + + // Now we take them off (this doesn't hold locks while it applies + // closures.) (If we did a full collection, then we'll do a full + // traversal. + _sts.join(); + if (_do_traversal) { + (void)_cg1r->refine(); + switch (_cg1r->get_last_pya()) { + case PYA_cancel: case PYA_continue: + // Continue was caught and handled inside "refine". If it's still + // "continue" when we get here, we're done. + _do_traversal = false; + break; + case PYA_restart: + assert(_do_traversal, "Because of Full GC."); + break; + } + } else { + int n_logs = 0; + int lower_limit = 0; + double start_vtime_sec; // only used when G1SmoothConcRefine is on + int prev_buffer_num; // only used when G1SmoothConcRefine is on + + if (G1SmoothConcRefine) { + lower_limit = 0; + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = (int) dcqs.completed_buffers_num(); + } else { + lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. + } + while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { + double end_vtime_sec; + double elapsed_vtime_sec; + int elapsed_vtime_ms; + int curr_buffer_num; + + if (G1SmoothConcRefine) { + end_vtime_sec = os::elapsedVTime(); + elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); + curr_buffer_num = (int) dcqs.completed_buffers_num(); + + if (curr_buffer_num > prev_buffer_num || + curr_buffer_num > DCQBarrierProcessCompletedThreshold) { + decreaseInterval(elapsed_vtime_ms); + } else if (curr_buffer_num < prev_buffer_num) { + increaseInterval(elapsed_vtime_ms); + } + } + + sample_young_list_rs_lengths(); + _co_tracker.update(false); + + if (G1SmoothConcRefine) { + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = curr_buffer_num; + + _sts.leave(); + os::sleep(Thread::current(), (jlong) _interval_ms, false); + _sts.join(); + } + + n_logs++; + } + // Make sure we harvest the PYA, if any. + (void)_cg1r->get_pya(); + } + _sts.leave(); +} + +void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + if (g1p->adaptive_young_list_length()) { + int regions_visited = 0; + + g1h->young_list_rs_length_sampling_init(); + while (g1h->young_list_rs_length_sampling_more()) { + g1h->young_list_rs_length_sampling_next(); + ++regions_visited; + + // we try to yield every time we visit 10 regions + if (regions_visited == 10) { + if (_sts.should_yield()) { + _sts.yield("G1 refine"); + // we just abandon the iteration + break; + } + regions_visited = 0; + } + } + + g1p->check_prediction_validity(); + } +} + +void ConcurrentG1RefineThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + + _co_tracker.enable(); + _co_tracker.start(); + + while (!_should_terminate) { + // wait until started is set. + if (G1RSBarrierUseQueue) { + queueBasedRefinement(); + } else { + traversalBasedRefinement(); + } + _sts.join(); + _co_tracker.update(); + _sts.leave(); + if (os::supports_vtime()) { + _vtime_accum = (os::elapsedVTime() - _vtime_start); + } else { + _vtime_accum = 0.0; + } + } + _sts.join(); + _co_tracker.update(true); + _sts.leave(); + assert(_should_terminate, "just checking"); + + terminate(); +} + + +void ConcurrentG1RefineThread::yield() { + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield"); + _sts.yield("G1 refine"); + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end"); +} + +void ConcurrentG1RefineThread::stop() { + // it is ok to take late safepoints here, if needed + { + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + } + + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + DirtyCardQ_CBL_mon->notify_all(); + } + + { + MutexLockerEx mu(Terminator_lock); + while (!_has_terminated) { + Terminator_lock->wait(); + } + } + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop"); +} + +void ConcurrentG1RefineThread::print() { + gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + +void ConcurrentG1RefineThread::set_do_traversal(bool b) { + _do_traversal = b; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,104 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward Decl. +class ConcurrentG1Refine; + +// The G1 Concurrent Refinement Thread (could be several in the future). + +class ConcurrentG1RefineThread: public ConcurrentGCThread { + friend class VMStructs; + friend class G1CollectedHeap; + + double _vtime_start; // Initial virtual time. + double _vtime_accum; // Initial virtual time. + + public: + virtual void run(); + + private: + ConcurrentG1Refine* _cg1r; + bool _started; + bool _in_progress; + volatile bool _restart; + + COTracker _co_tracker; + double _interval_ms; + + bool _do_traversal; + + void decreaseInterval(int processing_time_ms) { + double min_interval_ms = (double) processing_time_ms; + _interval_ms = 0.8 * _interval_ms; + if (_interval_ms < min_interval_ms) + _interval_ms = min_interval_ms; + } + void increaseInterval(int processing_time_ms) { + double max_interval_ms = 9.0 * (double) processing_time_ms; + _interval_ms = 1.1 * _interval_ms; + if (max_interval_ms > 0 && _interval_ms > max_interval_ms) + _interval_ms = max_interval_ms; + } + + void sleepBeforeNextCycle(); + + void traversalBasedRefinement(); + + void queueBasedRefinement(); + + // For use by G1CollectedHeap, which is a friend. + static SuspendibleThreadSet* sts() { return &_sts; } + + public: + // Constructor + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); + + // Printing + void print(); + + // Total virtual time so far. + double vtime_accum() { return _vtime_accum; } + + ConcurrentG1Refine* cg1r() { return _cg1r; } + + + void set_started() { _started = true; } + void clear_started() { _started = false; } + bool started() { return _started; } + + void set_in_progress() { _in_progress = true; } + void clear_in_progress() { _in_progress = false; } + bool in_progress() { return _in_progress; } + + void set_do_traversal(bool b); + bool do_traversal() { return _do_traversal; } + + void sample_young_list_rs_lengths(); + + // Yield for GC + void yield(); + + // shutdown + static void stop(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,3957 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentMark.cpp.incl" + +// +// CMS Bit Map Wrapper + +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): + _bm((uintptr_t*)NULL,0), + _shifter(shifter) { + _bmStartWord = (HeapWord*)(rs.base()); + _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes + ReservedSpace brs(ReservedSpace::allocation_align_size_up( + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); + + guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); + // For now we'll just commit all of the bit map up fromt. + // Later on we'll try to be more parsimonious with swap. + guarantee(_virtual_space.initialize(brs, brs.size()), + "couldn't reseve backing store for CMS bit map"); + assert(_virtual_space.committed_size() == brs.size(), + "didn't reserve backing store for all of CMS bit map?"); + _bm.set_map((uintptr_t*)_virtual_space.low()); + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= + _bmWordSize, "inconsistency in bit map sizing"); + _bm.set_size(_bmWordSize >> _shifter); +} + +HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + // First we must round addr *up* to a possible object boundary. + addr = (HeapWord*)align_size_up((intptr_t)addr, + HeapWordSize << _shifter); + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || !isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { + assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); + return (int) (diff >> _shifter); +} + +bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { + HeapWord* left = MAX2(_bmStartWord, mr.start()); + HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); + if (right > left) { + // Right-open interval [leftOffset, rightOffset). + return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); + } else { + return true; + } +} + +void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, + size_t from_start_index, + HeapWord* to_start_word, + size_t word_num) { + _bm.mostly_disjoint_range_union(from_bitmap, + from_start_index, + heapWordToOffset(to_start_word), + word_num); +} + +#ifndef PRODUCT +bool CMBitMapRO::covers(ReservedSpace rs) const { + // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); + assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, + "size inconsistency"); + return _bmStartWord == (HeapWord*)(rs.base()) && + _bmWordSize == rs.size()>>LogHeapWordSize; +} +#endif + +void CMBitMap::clearAll() { + _bm.clear(); + return; +} + +void CMBitMap::markRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + assert((offsetToHeapWord(heapWordToOffset(mr.end())) == + ((HeapWord *) mr.end())), + "markRange memory region end is not card aligned"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), true); +} + +void CMBitMap::clearRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), false); +} + +MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, + HeapWord* end_addr) { + HeapWord* start = getNextMarkedWordAddress(addr); + start = MIN2(start, end_addr); + HeapWord* end = getNextUnmarkedWordAddress(start); + end = MIN2(end, end_addr); + assert(start <= end, "Consistency check"); + MemRegion mr(start, end); + if (!mr.is_empty()) { + clearRange(mr); + } + return mr; +} + +CMMarkStack::CMMarkStack(ConcurrentMark* cm) : + _base(NULL), _cm(cm) +#ifdef ASSERT + , _drain_in_progress(false) + , _drain_in_progress_yields(false) +#endif +{} + +void CMMarkStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(oop, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; + _oops_do_bound = -1; + NOT_PRODUCT(_max_depth = 0); +} + +CMMarkStack::~CMMarkStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMMarkStack::par_push(oop ptr) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = ptr; + // Note that we don't maintain this atomically. We could, but it + // doesn't seem necessary. + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + +void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + for (int i = 0; i < n; i++) { + int ind = index + i; + assert(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + + +void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint start = _index; + jint next_index = start + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + // Otherwise. + _index = next_index; + for (int i = 0; i < n; i++) { + int ind = start + i; + guarantee(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } +} + + +bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint index = _index; + if (index == 0) { + *n = 0; + return false; + } else { + int k = MIN2(max, index); + jint new_ind = index - k; + for (int j = 0; j < k; j++) { + ptr_arr[j] = _base[new_ind + j]; + } + _index = new_ind; + *n = k; + return true; + } +} + + +CMRegionStack::CMRegionStack() : _base(NULL) {} + +void CMRegionStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(MemRegion, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; +} + +CMRegionStack::~CMRegionStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMRegionStack::push(MemRegion mr) { + assert(mr.word_size() > 0, "Precondition"); + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = mr; + return; + } + // Otherwise, we need to try again. + } +} + +MemRegion CMRegionStack::pop() { + while (true) { + // Otherwise... + jint index = _index; + + if (index == 0) { + return MemRegion(); + } + jint next_index = index-1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + MemRegion mr = _base[next_index]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant" ); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + return mr; + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + // Otherwise, we need to try again. + } +} + +bool CMRegionStack::invalidate_entries_into_cset() { + bool result = false; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + for (int i = 0; i < _oops_do_bound; ++i) { + MemRegion mr = _base[i]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant"); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + HeapRegion* hr = g1h->heap_region_containing(mr.start()); + tmp_guarantee_CM( hr != NULL, "invariant" ); + if (hr->in_collection_set()) { + // The region points into the collection set + _base[i] = MemRegion(); + result = true; + } + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + return result; +} + +template +bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { + assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after + || SafepointSynchronize::is_at_safepoint(), + "Drain recursion must be yield-safe."); + bool res = true; + debug_only(_drain_in_progress = true); + debug_only(_drain_in_progress_yields = yield_after); + while (!isEmpty()) { + oop newOop = pop(); + assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); + assert(newOop->is_oop(), "Expected an oop"); + assert(bm == NULL || bm->isMarked((HeapWord*)newOop), + "only grey objects on this stack"); + // iterate over the oops in this oop, marking and pushing + // the ones in CMS generation. + newOop->oop_iterate(cl); + if (yield_after && _cm->do_yield_check()) { + res = false; break; + } + } + debug_only(_drain_in_progress = false); + return res; +} + +void CMMarkStack::oops_do(OopClosure* f) { + if (_index == 0) return; + assert(_oops_do_bound != -1 && _oops_do_bound <= _index, + "Bound must be set."); + for (int i = 0; i < _oops_do_bound; i++) { + f->do_oop(&_base[i]); + } + _oops_do_bound = -1; +} + +bool ConcurrentMark::not_yet_marked(oop obj) const { + return (_g1h->is_obj_ill(obj) + || (_g1h->is_in_permanent(obj) + && !nextMarkBitMap()->isMarked((HeapWord*)obj))); +} + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + +ConcurrentMark::ConcurrentMark(ReservedSpace rs, + int max_regions) : + _markBitMap1(rs, MinObjAlignment - 1), + _markBitMap2(rs, MinObjAlignment - 1), + + _parallel_marking_threads(0), + _sleep_factor(0.0), + _marking_task_overhead(1.0), + _cleanup_sleep_factor(0.0), + _cleanup_task_overhead(1.0), + _region_bm(max_regions, false /* in_resource_area*/), + _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> + CardTableModRefBS::card_shift, + false /* in_resource_area*/), + _prevMarkBitMap(&_markBitMap1), + _nextMarkBitMap(&_markBitMap2), + _at_least_one_mark_complete(false), + + _markStack(this), + _regionStack(), + // _finger set in set_non_marking_state + + _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), + // _active_tasks set in set_non_marking_state + // _tasks set inside the constructor + _task_queues(new CMTaskQueueSet((int) _max_task_num)), + _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), + + _has_overflown(false), + _concurrent(false), + + // _verbose_level set below + + _init_times(), + _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), + _cleanup_times(), + _total_counting_time(0.0), + _total_rs_scrub_time(0.0), + + _parallel_workers(NULL), + _cleanup_co_tracker(G1CLGroup) +{ + CMVerboseLevel verbose_level = + (CMVerboseLevel) G1MarkingVerboseLevel; + if (verbose_level < no_verbose) + verbose_level = no_verbose; + if (verbose_level > high_verbose) + verbose_level = high_verbose; + _verbose_level = verbose_level; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " + "heap end = "PTR_FORMAT, _heap_start, _heap_end); + + _markStack.allocate(G1CMStackSize); + _regionStack.allocate(G1CMRegionStackSize); + + // Create & start a ConcurrentMark thread. + if (G1ConcMark) { + _cmThread = new ConcurrentMarkThread(this); + assert(cmThread() != NULL, "CM Thread should have been created"); + assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); + } else { + _cmThread = NULL; + } + _g1h = G1CollectedHeap::heap(); + assert(CGC_lock != NULL, "Where's the CGC_lock?"); + assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); + assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); + + SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); + satb_qs.set_buffer_size(G1SATBLogBufferSize); + + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); + for (int i = 0 ; i < size; i++) { + _par_cleanup_thread_state[i] = new ParCleanupThreadState; + } + + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); + + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail + _active_tasks = _max_task_num; + for (int i = 0; i < (int) _max_task_num; ++i) { + CMTaskQueue* task_queue = new CMTaskQueue(); + task_queue->initialize(); + _task_queues->register_queue(i, task_queue); + + _tasks[i] = new CMTask(i, this, task_queue, _task_queues); + _accum_task_vtime[i] = 0.0; + } + + if (ParallelMarkingThreads > ParallelGCThreads) { + vm_exit_during_initialization("Can't have more ParallelMarkingThreads " + "than ParallelGCThreads."); + } + if (ParallelGCThreads == 0) { + // if we are not running with any parallel GC threads we will not + // spawn any marking threads either + _parallel_marking_threads = 0; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else { + if (ParallelMarkingThreads > 0) { + // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc + // if both are set + + _parallel_marking_threads = ParallelMarkingThreads; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else if (G1MarkingOverheadPerc > 0) { + // we will calculate the number of parallel marking threads + // based on a target overhead with respect to the soft real-time + // goal + + double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; + double overall_cm_overhead = + (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; + double cpu_ratio = 1.0 / (double) os::processor_count(); + double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); + double marking_task_overhead = + overall_cm_overhead / marking_thread_num * + (double) os::processor_count(); + double sleep_factor = + (1.0 - marking_task_overhead) / marking_task_overhead; + + _parallel_marking_threads = (size_t) marking_thread_num; + _sleep_factor = sleep_factor; + _marking_task_overhead = marking_task_overhead; + } else { + _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } + + if (parallel_marking_threads() > 1) + _cleanup_task_overhead = 1.0; + else + _cleanup_task_overhead = marking_task_overhead(); + _cleanup_sleep_factor = + (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); + +#if 0 + gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); + gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); + gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); + gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); + gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); +#endif + + guarantee( parallel_marking_threads() > 0, "peace of mind" ); + _parallel_workers = new WorkGang("Parallel Marking Threads", + (int) parallel_marking_threads(), false, true); + if (_parallel_workers == NULL) + vm_exit_during_initialization("Failed necessary allocation."); + } + + // so that the call below can read a sensible value + _heap_start = (HeapWord*) rs.base(); + set_non_marking_state(); +} + +void ConcurrentMark::update_g1_committed(bool force) { + // If concurrent marking is not in progress, then we do not need to + // update _heap_end. This has a subtle and important + // side-effect. Imagine that two evacuation pauses happen between + // marking completion and remark. The first one can grow the + // heap (hence now the finger is below the heap end). Then, the + // second one could unnecessarily push regions on the region + // stack. This causes the invariant that the region stack is empty + // at the beginning of remark to be false. By ensuring that we do + // not observe heap expansions after marking is complete, then we do + // not have this problem. + if (!concurrent_marking_in_progress() && !force) + return; + + MemRegion committed = _g1h->g1_committed(); + tmp_guarantee_CM( committed.start() == _heap_start, + "start shouldn't change" ); + HeapWord* new_end = committed.end(); + if (new_end > _heap_end) { + // The heap has been expanded. + + _heap_end = new_end; + } + // Notice that the heap can also shrink. However, this only happens + // during a Full GC (at least currently) and the entire marking + // phase will bail out and the task will not be restarted. So, let's + // do nothing. +} + +void ConcurrentMark::reset() { + // Starting values for these two. This should be called in a STW + // phase. CM will be notified of any future g1_committed expansions + // will be at the end of evacuation pauses, when tasks are + // inactive. + MemRegion committed = _g1h->g1_committed(); + _heap_start = committed.start(); + _heap_end = committed.end(); + + guarantee( _heap_start != NULL && + _heap_end != NULL && + _heap_start < _heap_end, "heap bounds should look ok" ); + + // reset all the marking data structures and any necessary flags + clear_marking_state(); + + if (verbose_low()) + gclog_or_tty->print_cr("[global] resetting"); + + // We do reset all of them, since different phases will use + // different number of active threads. So, it's easiest to have all + // of them ready. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->reset(_nextMarkBitMap); + + // we need this to make sure that the flag is on during the evac + // pause with initial mark piggy-backed + set_concurrent_marking_in_progress(); +} + +void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { + guarantee( active_tasks <= _max_task_num, "we should not have more" ); + + _active_tasks = active_tasks; + // Need to update the three data structures below according to the + // number of active threads for this phase. + _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); + _first_overflow_barrier_sync.set_n_workers((int) active_tasks); + _second_overflow_barrier_sync.set_n_workers((int) active_tasks); + + _concurrent = concurrent; + // We propagate this to all tasks, not just the active ones. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->set_concurrent(concurrent); + + if (concurrent) { + set_concurrent_marking_in_progress(); + } else { + // We currently assume that the concurrent flag has been set to + // false before we start remark. At this point we should also be + // in a STW phase. + guarantee( !concurrent_marking_in_progress(), "invariant" ); + guarantee( _finger == _heap_end, "only way to get here" ); + update_g1_committed(true); + } +} + +void ConcurrentMark::set_non_marking_state() { + // We set the global marking state to some default values when we're + // not doing marking. + clear_marking_state(); + _active_tasks = 0; + clear_concurrent_marking_in_progress(); +} + +ConcurrentMark::~ConcurrentMark() { + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; + FREE_C_HEAP_ARRAY(ParCleanupThreadState*, + _par_cleanup_thread_state); + + for (int i = 0; i < (int) _max_task_num; ++i) { + delete _task_queues->queue(i); + delete _tasks[i]; + } + delete _task_queues; + FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); +} + +// This closure is used to mark refs into the g1 generation +// from external roots in the CMS bit map. +// Called at the first checkpoint. +// + +#define PRINT_REACHABLE_AT_INITIAL_MARK 0 +#if PRINT_REACHABLE_AT_INITIAL_MARK +static FILE* reachable_file = NULL; + +class PrintReachableClosure: public OopsInGenClosure { + CMBitMap* _bm; + int _level; +public: + PrintReachableClosure(CMBitMap* bm) : + _bm(bm), _level(0) { + guarantee(reachable_file != NULL, "pre-condition"); + } + void do_oop(oop* p) { + oop obj = *p; + HeapWord* obj_addr = (HeapWord*)obj; + if (obj == NULL) return; + fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", + _level, p, (void*) obj, _bm->isMarked(obj_addr)); + if (!_bm->isMarked(obj_addr)) { + _bm->mark(obj_addr); + _level++; + obj->oop_iterate(this); + _level--; + } + } +}; +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + +#define SEND_HEAP_DUMP_TO_FILE 0 +#if SEND_HEAP_DUMP_TO_FILE +static FILE* heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE + +void ConcurrentMark::clearNextBitmap() { + guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); + + // clear the mark bitmap (no grey objects to start with). + // We need to do this in chunks and offer to yield in between + // each chunk. + HeapWord* start = _nextMarkBitMap->startWord(); + HeapWord* end = _nextMarkBitMap->endWord(); + HeapWord* cur = start; + size_t chunkSize = M; + while (cur < end) { + HeapWord* next = cur + chunkSize; + if (next > end) + next = end; + MemRegion mr(cur,next); + _nextMarkBitMap->clearRange(mr); + cur = next; + do_yield_check(); + } +} + +class NoteStartOfMarkHRClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + r->note_start_of_marking(true); + } + return false; + } +}; + +void ConcurrentMark::checkpointRootsInitialPre() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + + _has_aborted = false; + + // Find all the reachable objects... +#if PRINT_REACHABLE_AT_INITIAL_MARK + guarantee(reachable_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); + reachable_file = fopen(fn_buf, "w"); + // clear the mark bitmap (no grey objects to start with) + _nextMarkBitMap->clearAll(); + PrintReachableClosure prcl(_nextMarkBitMap); + g1h->process_strong_roots( + false, // fake perm gen collection + SharedHeap::SO_AllClasses, + &prcl, // Regular roots + &prcl // Perm Gen Roots + ); + // The root iteration above "consumed" dirty cards in the perm gen. + // Therefore, as a shortcut, we dirty all such cards. + g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); + fclose(reachable_file); + reachable_file = NULL; + // clear the mark bitmap again. + _nextMarkBitMap->clearAll(); + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + COMPILER2_PRESENT(DerivedPointerTable::clear()); +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + + // Initialise marking structures. This has to be done in a STW phase. + reset(); +} + +class CMMarkRootsClosure: public OopsInGenClosure { +private: + ConcurrentMark* _cm; + G1CollectedHeap* _g1h; + bool _do_barrier; + +public: + CMMarkRootsClosure(ConcurrentMark* cm, + G1CollectedHeap* g1h, + bool do_barrier) : _cm(cm), _g1h(g1h), + _do_barrier(do_barrier) { } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + oop thisOop = *p; + if (thisOop != NULL) { + assert(thisOop->is_oop() || thisOop->mark() == NULL, + "expected an oop, possibly with mark word displaced"); + HeapWord* addr = (HeapWord*)thisOop; + if (_g1h->is_in_g1_reserved(addr)) { + _cm->grayRoot(thisOop); + } + } + if (_do_barrier) { + assert(!_g1h->is_in_g1_reserved(p), + "Should be called on external roots"); + do_barrier(p); + } + } +}; + +void ConcurrentMark::checkpointRootsInitialPost() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // For each region note start of marking. + NoteStartOfMarkHRClosure startcl; + g1h->heap_region_iterate(&startcl); + + // Start weak-reference discovery. + ReferenceProcessor* rp = g1h->ref_processor(); + rp->verify_no_references_recorded(); + rp->enable_discovery(); // enable ("weak") refs discovery + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); + satb_mq_set.set_active_all_threads(true); + + // update_g1_committed() will be called at the end of an evac pause + // when marking is on. So, it's also called at the end of the + // initial-mark pause to update the heap end, if the heap expands + // during it. No need to call it here. + + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + + size_t max_marking_threads = + MAX2((size_t) 1, parallel_marking_threads()); + for (int i = 0; i < (int)_max_task_num; ++i) { + _tasks[i]->enable_co_tracker(); + if (i < (int) max_marking_threads) + _tasks[i]->reset_co_tracker(marking_task_overhead()); + else + _tasks[i]->reset_co_tracker(0.0); + } +} + +// Checkpoint the roots into this generation from outside +// this generation. [Note this initial checkpoint need only +// be approximate -- we'll do a catch up phase subsequently.] +void ConcurrentMark::checkpointRootsInitial() { + assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // If there has not been a GC[n-1] since last GC[n] cycle completed, + // precede our marking with a collection of all + // younger generations to keep floating garbage to a minimum. + // YSR: we won't do this for now -- it's an optimization to be + // done post-beta. + + // YSR: ignoring weak refs for now; will do at bug fixing stage + // EVM: assert(discoveredRefsAreClear()); + + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_init_start(); + checkpointRootsInitialPre(); + + // YSR: when concurrent precleaning is in place, we'll + // need to clear the cached card table here + + ResourceMark rm; + HandleMark hm; + + g1h->ensure_parsability(false); + g1h->perm_gen()->save_marks(); + + CMMarkRootsClosure notOlder(this, g1h, false); + CMMarkRootsClosure older(this, g1h, true); + + g1h->set_marking_started(); + g1h->rem_set()->prepare_for_younger_refs_iterate(false); + + g1h->process_strong_roots(false, // fake perm gen collection + SharedHeap::SO_AllClasses, + ¬Older, // Regular roots + &older // Perm Gen Roots + ); + checkpointRootsInitialPost(); + + // Statistics. + double end = os::elapsedTime(); + _init_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + g1p->record_concurrent_mark_init_end(); +} + +/* + Notice that in the next two methods, we actually leave the STS + during the barrier sync and join it immediately afterwards. If we + do not do this, this then the following deadlock can occur: one + thread could be in the barrier sync code, waiting for the other + thread to also sync up, whereas another one could be trying to + yield, while also waiting for the other threads to sync up too. + + Because the thread that does the sync barrier has left the STS, it + is possible to be suspended for a Full GC or an evacuation pause + could occur. This is actually safe, since the entering the sync + barrier is one of the last things do_marking_step() does, and it + doesn't manipulate any data structures afterwards. +*/ + +void ConcurrentMark::enter_first_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering first barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _first_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everyone should have synced up and not be doing any + // more work + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); + + // let task 0 do this + if (task_num == 0) { + // task 0 is responsible for clearing the global data structures + clear_marking_state(); + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); + } + } + + // after this, each task should reset its own data structures then + // then go into the second barrier +} + +void ConcurrentMark::enter_second_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering second barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _second_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everything should be re-initialised and ready to go + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); +} + +void ConcurrentMark::grayRoot(oop p) { + HeapWord* addr = (HeapWord*) p; + // We can't really check against _heap_start and _heap_end, since it + // is possible during an evacuation pause with piggy-backed + // initial-mark that the committed space is expanded during the + // pause without CM observing this change. So the assertions below + // is a bit conservative; but better than nothing. + tmp_guarantee_CM( _g1h->g1_committed().contains(addr), + "address should be within the heap bounds" ); + + if (!_nextMarkBitMap->isMarked(addr)) + _nextMarkBitMap->parMark(addr); +} + +void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { + // The objects on the region have already been marked "in bulk" by + // the caller. We only need to decide whether to push the region on + // the region stack or not. + + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // We're done with marking and waiting for remark. We do not need to + // push anything else on the region stack. + return; + + HeapWord* finger = _finger; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] attempting to push " + "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " + PTR_FORMAT, mr.start(), mr.end(), finger); + + if (mr.start() < finger) { + // The finger is always heap region aligned and it is not possible + // for mr to span heap regions. + tmp_guarantee_CM( mr.end() <= finger, "invariant" ); + + tmp_guarantee_CM( mr.start() <= mr.end() && + _heap_start <= mr.start() && + mr.end() <= _heap_end, + "region boundaries should fall within the committed space" ); + if (verbose_low()) + gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " + "below the finger, pushing it", + mr.start(), mr.end()); + + if (!region_stack_push(mr)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] region stack has overflown."); + } + } +} + +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { + // The object is not marked by the caller. We need to at least mark + // it and maybe push in on the stack. + + HeapWord* addr = (HeapWord*)p; + if (!_nextMarkBitMap->isMarked(addr)) { + // We definitely need to mark it, irrespective whether we bail out + // because we're done with marking. + if (_nextMarkBitMap->parMark(addr)) { + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // If we're done with concurrent marking and we're waiting for + // remark, then we're not pushing anything on the stack. + return; + + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(addr) above + HeapWord* finger = _finger; + + if (addr < finger) { + if (!mark_stack_push(oop(addr))) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow " + "during parMark"); + } + } + } + } +} + +class CMConcurrentMarkingTask: public AbstractGangTask { +private: + ConcurrentMark* _cm; + ConcurrentMarkThread* _cmt; + +public: + void work(int worker_i) { + guarantee( Thread::current()->is_ConcurrentGC_thread(), + "this should only be done by a conc GC thread" ); + + double start_vtime = os::elapsedVTime(); + + ConcurrentGCThread::stsJoin(); + + guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); + CMTask* the_task = _cm->task(worker_i); + the_task->start_co_tracker(); + the_task->record_start_time(); + if (!_cm->has_aborted()) { + do { + double start_vtime_sec = os::elapsedVTime(); + double start_time_sec = os::elapsedTime(); + the_task->do_marking_step(10.0); + double end_time_sec = os::elapsedTime(); + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + double elapsed_time_sec = end_time_sec - start_time_sec; + _cm->clear_has_overflown(); + + bool ret = _cm->do_yield_check(worker_i); + + jlong sleep_time_ms; + if (!_cm->has_aborted() && the_task->has_aborted()) { + sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); + ConcurrentGCThread::stsLeave(); + os::sleep(Thread::current(), sleep_time_ms, false); + ConcurrentGCThread::stsJoin(); + } + double end_time2_sec = os::elapsedTime(); + double elapsed_time2_sec = end_time2_sec - start_time_sec; + + the_task->update_co_tracker(); + +#if 0 + gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + the_task->conc_overhead(os::elapsedTime()) * 8.0); + gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", + elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); +#endif + } while (!_cm->has_aborted() && the_task->has_aborted()); + } + the_task->record_end_time(); + guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); + + ConcurrentGCThread::stsLeave(); + + double end_vtime = os::elapsedVTime(); + the_task->update_co_tracker(true); + _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); + } + + CMConcurrentMarkingTask(ConcurrentMark* cm, + ConcurrentMarkThread* cmt) : + AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } + + ~CMConcurrentMarkingTask() { } +}; + +void ConcurrentMark::markFromRoots() { + // we might be tempted to assert that: + // assert(asynch == !SafepointSynchronize::is_at_safepoint(), + // "inconsistent argument?"); + // However that wouldn't be right, because it's possible that + // a safepoint is indeed in progress as a younger generation + // stop-the-world GC happens even as we mark in this generation. + + _restart_for_overflow = false; + + set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); + + CMConcurrentMarkingTask markingTask(this, cmThread()); + if (parallel_marking_threads() > 0) + _parallel_workers->run_task(&markingTask); + else + markingTask.work(0); + print_stats(); +} + +void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + G1CollectorPolicy* g1p = g1h->g1_policy(); + g1p->record_concurrent_mark_remark_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + checkpointRootsFinalWork(); + + double mark_work_end = os::elapsedTime(); + + weakRefsWork(clear_all_soft_refs); + + if (has_overflown()) { + // Oops. We overflowed. Restart concurrent marking. + _restart_for_overflow = true; + // Clear the flag. We do not need it any more. + clear_has_overflown(); + if (G1TraceMarkStackOverflow) + gclog_or_tty->print_cr("\nRemark led to restart for overflow."); + } else { + // We're done with marking. + JavaThread::satb_mark_queue_set().set_active_all_threads(false); + } + +#if VERIFY_OBJS_PROCESSED + _scan_obj_cl.objs_processed = 0; + ThreadLocalObjQueue::objs_enqueued = 0; +#endif + + // Statistics + double now = os::elapsedTime(); + _remark_mark_times.add((mark_work_end - start) * 1000.0); + _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); + _remark_times.add((now - start) * 1000.0); + + GCOverheadReporter::recordSTWEnd(now); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->disable_co_tracker(); + _cleanup_co_tracker.enable(); + _cleanup_co_tracker.reset(cleanup_task_overhead()); + g1p->record_concurrent_mark_remark_end(); +} + + +#define CARD_BM_TEST_MODE 0 + +class CalcLiveObjectsClosure: public HeapRegionClosure { + + CMBitMapRO* _bm; + ConcurrentMark* _cm; + COTracker* _co_tracker; + bool _changed; + bool _yield; + size_t _words_done; + size_t _tot_live; + size_t _tot_used; + size_t _regions_done; + double _start_vtime_sec; + + BitMap* _region_bm; + BitMap* _card_bm; + intptr_t _bottom_card_num; + bool _final; + + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { + for (intptr_t i = start_card_num; i <= last_card_num; i++) { +#if CARD_BM_TEST_MODE + guarantee(_card_bm->at(i - _bottom_card_num), + "Should already be set."); +#else + _card_bm->par_at_put(i - _bottom_card_num, 1); +#endif + } + } + +public: + CalcLiveObjectsClosure(bool final, + CMBitMapRO *bm, ConcurrentMark *cm, + BitMap* region_bm, BitMap* card_bm, + COTracker* co_tracker) : + _bm(bm), _cm(cm), _changed(false), _yield(true), + _words_done(0), _tot_live(0), _tot_used(0), + _region_bm(region_bm), _card_bm(card_bm), + _final(final), _co_tracker(co_tracker), + _regions_done(0), _start_vtime_sec(0.0) + { + _bottom_card_num = + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> + CardTableModRefBS::card_shift); + } + + bool doHeapRegion(HeapRegion* hr) { + if (_co_tracker != NULL) + _co_tracker->update(); + + if (!_final && _regions_done == 0) + _start_vtime_sec = os::elapsedVTime(); + + if (hr->continuesHumongous()) return false; + + HeapWord* nextTop = hr->next_top_at_mark_start(); + HeapWord* start = hr->top_at_conc_mark_count(); + assert(hr->bottom() <= start && start <= hr->end() && + hr->bottom() <= nextTop && nextTop <= hr->end() && + start <= nextTop, + "Preconditions."); + // Otherwise, record the number of word's we'll examine. + size_t words_done = (nextTop - start); + // Find the first marked object at or after "start". + start = _bm->getNextMarkedWordAddress(start, nextTop); + size_t marked_bytes = 0; + + // Below, the term "card num" means the result of shifting an address + // by the card shift -- address 0 corresponds to card number 0. One + // must subtract the card num of the bottom of the heap to obtain a + // card table index. + // The first card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t start_card_num = -1; + // The last card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t last_card_num = -1; + + while (start < nextTop) { + if (_yield && _cm->do_yield_check()) { + // We yielded. It might be for a full collection, in which case + // all bets are off; terminate the traversal. + if (_cm->has_aborted()) { + _changed = false; + return true; + } else { + // Otherwise, it might be a collection pause, and the region + // we're looking at might be in the collection set. We'll + // abandon this region. + return false; + } + } + oop obj = oop(start); + int obj_sz = obj->size(); + // The card num of the start of the current object. + intptr_t obj_card_num = + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); + + HeapWord* obj_last = start + obj_sz - 1; + intptr_t obj_last_card_num = + intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); + + if (obj_card_num != last_card_num) { + if (start_card_num == -1) { + assert(last_card_num == -1, "Both or neither."); + start_card_num = obj_card_num; + } else { + assert(last_card_num != -1, "Both or neither."); + assert(obj_card_num >= last_card_num, "Inv"); + if ((obj_card_num - last_card_num) > 1) { + // Mark the last run, and start a new one. + mark_card_num_range(start_card_num, last_card_num); + start_card_num = obj_card_num; + } + } +#if CARD_BM_TEST_MODE + /* + gclog_or_tty->print_cr("Setting bits from %d/%d.", + obj_card_num - _bottom_card_num, + obj_last_card_num - _bottom_card_num); + */ + for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { + _card_bm->par_at_put(j - _bottom_card_num, 1); + } +#endif + } + // In any case, we set the last card num. + last_card_num = obj_last_card_num; + + marked_bytes += obj_sz * HeapWordSize; + // Find the next marked object after this one. + start = _bm->getNextMarkedWordAddress(start + 1, nextTop); + _changed = true; + } + // Handle the last range, if any. + if (start_card_num != -1) + mark_card_num_range(start_card_num, last_card_num); + if (_final) { + // Mark the allocated-since-marking portion... + HeapWord* tp = hr->top(); + if (nextTop < tp) { + start_card_num = + intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); + last_card_num = + intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); + mark_card_num_range(start_card_num, last_card_num); + // This definitely means the region has live objects. + _region_bm->par_at_put(hr->hrs_index(), 1); + } + } + + hr->add_to_marked_bytes(marked_bytes); + // Update the live region bitmap. + if (marked_bytes > 0) { + _region_bm->par_at_put(hr->hrs_index(), 1); + } + hr->set_top_at_conc_mark_count(nextTop); + _tot_live += hr->next_live_bytes(); + _tot_used += hr->used(); + _words_done = words_done; + + if (!_final) { + ++_regions_done; + if (_regions_done % 10 == 0) { + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; + if (elapsed_vtime_sec > (10.0 / 1000.0)) { + jlong sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); +#if 0 + gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + _co_tracker->concOverhead(os::elapsedTime())); +#endif + os::sleep(Thread::current(), sleep_time_ms, false); + _start_vtime_sec = end_vtime_sec; + } + } + } + + return false; + } + + bool changed() { return _changed; } + void reset() { _changed = false; _words_done = 0; } + void no_yield() { _yield = false; } + size_t words_done() { return _words_done; } + size_t tot_live() { return _tot_live; } + size_t tot_used() { return _tot_used; } +}; + + +void ConcurrentMark::calcDesiredRegions() { + guarantee( _cleanup_co_tracker.enabled(), "invariant" ); + _cleanup_co_tracker.start(); + + _region_bm.clear(); + _card_bm.clear(); + CalcLiveObjectsClosure calccl(false /*final*/, + nextMarkBitMap(), this, + &_region_bm, &_card_bm, + &_cleanup_co_tracker); + G1CollectedHeap *g1h = G1CollectedHeap::heap(); + g1h->heap_region_iterate(&calccl); + + do { + calccl.reset(); + g1h->heap_region_iterate(&calccl); + } while (calccl.changed()); + + _cleanup_co_tracker.update(true); +} + +class G1ParFinalCountTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + CMBitMap* _bm; + size_t _n_workers; + size_t *_live_bytes; + size_t *_used_bytes; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 final counting"), _g1h(g1h), + _bm(bm), _region_bm(region_bm), _card_bm(card_bm) + { + if (ParallelGCThreads > 0) + _n_workers = _g1h->workers()->total_workers(); + else + _n_workers = 1; + _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + } + + ~G1ParFinalCountTask() { + FREE_C_HEAP_ARRAY(size_t, _live_bytes); + FREE_C_HEAP_ARRAY(size_t, _used_bytes); + } + + void work(int i) { + CalcLiveObjectsClosure calccl(true /*final*/, + _bm, _g1h->concurrent_mark(), + _region_bm, _card_bm, + NULL /* CO tracker */); + calccl.no_yield(); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&calccl, i, 1); + } else { + _g1h->heap_region_iterate(&calccl); + } + assert(calccl.complete(), "Shouldn't have yielded!"); + + guarantee( (size_t)i < _n_workers, "invariant" ); + _live_bytes[i] = calccl.tot_live(); + _used_bytes[i] = calccl.tot_used(); + } + size_t live_bytes() { + size_t live_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + live_bytes += _live_bytes[i]; + return live_bytes; + } + size_t used_bytes() { + size_t used_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + used_bytes += _used_bytes[i]; + return used_bytes; + } +}; + +class G1ParNoteEndTask; + +class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { + G1CollectedHeap* _g1; + int _worker_num; + size_t _max_live_bytes; + size_t _regions_claimed; + size_t _freed_bytes; + size_t _cleared_h_regions; + size_t _freed_regions; + UncleanRegionList* _unclean_region_list; + double _claimed_region_time; + double _max_region_time; + +public: + G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num); + size_t freed_bytes() { return _freed_bytes; } + size_t cleared_h_regions() { return _cleared_h_regions; } + size_t freed_regions() { return _freed_regions; } + UncleanRegionList* unclean_region_list() { + return _unclean_region_list; + } + + bool doHeapRegion(HeapRegion *r); + + size_t max_live_bytes() { return _max_live_bytes; } + size_t regions_claimed() { return _regions_claimed; } + double claimed_region_time_sec() { return _claimed_region_time; } + double max_region_time_sec() { return _max_region_time; } +}; + +class G1ParNoteEndTask: public AbstractGangTask { + friend class G1NoteEndOfConcMarkClosure; +protected: + G1CollectedHeap* _g1h; + size_t _max_live_bytes; + size_t _freed_bytes; + ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; +public: + G1ParNoteEndTask(G1CollectedHeap* g1h, + ConcurrentMark::ParCleanupThreadState** + par_cleanup_thread_state) : + AbstractGangTask("G1 note end"), _g1h(g1h), + _max_live_bytes(0), _freed_bytes(0), + _par_cleanup_thread_state(par_cleanup_thread_state) + {} + + void work(int i) { + double start = os::elapsedTime(); + G1NoteEndOfConcMarkClosure g1_note_end(_g1h, + &_par_cleanup_thread_state[i]->list, + i); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 2); + } else { + _g1h->heap_region_iterate(&g1_note_end); + } + assert(g1_note_end.complete(), "Shouldn't have yielded!"); + + // Now finish up freeing the current thread's regions. + _g1h->finish_free_region_work(g1_note_end.freed_bytes(), + g1_note_end.cleared_h_regions(), + 0, NULL); + { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + _max_live_bytes += g1_note_end.max_live_bytes(); + _freed_bytes += g1_note_end.freed_bytes(); + } + double end = os::elapsedTime(); + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " + "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", + i, start, end, (end-start)*1000.0, + g1_note_end.regions_claimed(), + g1_note_end.claimed_region_time_sec()*1000.0, + g1_note_end.max_region_time_sec()*1000.0); + } + } + size_t max_live_bytes() { return _max_live_bytes; } + size_t freed_bytes() { return _freed_bytes; } +}; + +class G1ParScrubRemSetTask: public AbstractGangTask { +protected: + G1RemSet* _g1rs; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParScrubRemSetTask(G1CollectedHeap* g1h, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), + _region_bm(region_bm), _card_bm(card_bm) + {} + + void work(int i) { + if (ParallelGCThreads > 0) { + _g1rs->scrub_par(_region_bm, _card_bm, i, 3); + } else { + _g1rs->scrub(_region_bm, _card_bm); + } + } + +}; + +G1NoteEndOfConcMarkClosure:: +G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num) + : _g1(g1), _worker_num(worker_num), + _max_live_bytes(0), _regions_claimed(0), + _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), + _claimed_region_time(0.0), _max_region_time(0.0), + _unclean_region_list(list) +{} + +bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { + // We use a claim value of zero here because all regions + // were claimed with value 1 in the FinalCount task. + r->reset_gc_time_stamp(); + if (!r->continuesHumongous()) { + double start = os::elapsedTime(); + _regions_claimed++; + r->note_end_of_marking(); + _max_live_bytes += r->max_live_bytes(); + _g1->free_region_if_totally_empty_work(r, + _freed_bytes, + _cleared_h_regions, + _freed_regions, + _unclean_region_list, + true /*par*/); + double region_time = (os::elapsedTime() - start); + _claimed_region_time += region_time; + if (region_time > _max_region_time) _max_region_time = region_time; + } + return false; +} + +void ConcurrentMark::cleanup() { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + _cleanup_co_tracker.disable(); + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_cleanup_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // Do counting once more with the world stopped for good measure. + G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), + &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_count_task); + g1h->set_par_threads(0); + } else { + g1_par_count_task.work(0); + } + + size_t known_garbage_bytes = + g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); +#if 0 + gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", + (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), + (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), + (double) known_garbage_bytes / (double) (1024 * 1024)); +#endif // 0 + g1p->set_known_garbage_bytes(known_garbage_bytes); + + size_t start_used_bytes = g1h->used(); + _at_least_one_mark_complete = true; + g1h->set_marking_complete(); + + double count_end = os::elapsedTime(); + double this_final_counting_time = (count_end - start); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr("Cleanup:"); + gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", + this_final_counting_time*1000.0); + } + _total_counting_time += this_final_counting_time; + + // Install newly created mark bitMap as "prev". + swapMarkBitMaps(); + + g1h->reset_gc_time_stamp(); + + // Note end of marking in all heap regions. + double note_end_start = os::elapsedTime(); + G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_note_end_task); + g1h->set_par_threads(0); + } else { + g1_par_note_end_task.work(0); + } + g1h->set_unclean_regions_coming(true); + double note_end_end = os::elapsedTime(); + // Tell the mutators that there might be unclean regions coming... + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", + (note_end_end - note_end_start)*1000.0); + } + + // Now we "scrub" remembered sets. Note that we must do this before the + // call below, since it affects the metric by which we sort the heap + // regions. + if (G1ScrubRemSets) { + double rs_scrub_start = os::elapsedTime(); + G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_scrub_rs_task); + g1h->set_par_threads(0); + } else { + g1_par_scrub_rs_task.work(0); + } + + double rs_scrub_end = os::elapsedTime(); + double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); + _total_rs_scrub_time += this_rs_scrub_time; + } + + // this will also free any regions totally full of garbage objects, + // and sort the regions. + g1h->g1_policy()->record_concurrent_mark_cleanup_end( + g1_par_note_end_task.freed_bytes(), + g1_par_note_end_task.max_live_bytes()); + + // Statistics. + double end = os::elapsedTime(); + _cleanup_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + // G1CollectedHeap::heap()->print(); + // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", + // G1CollectedHeap::heap()->get_gc_time_stamp()); + + if (PrintGC || PrintGCDetails) { + g1h->print_size_transition(gclog_or_tty, + start_used_bytes, + g1h->used(), + g1h->capacity()); + } + + size_t cleaned_up_bytes = start_used_bytes - g1h->used(); + g1p->decrease_known_garbage_bytes(cleaned_up_bytes); + + // We need to make this be a "collection" so any collection pause that + // races with it goes around and waits for completeCleanup to finish. + g1h->increment_total_collections(); + +#ifndef PRODUCT + if (G1VerifyConcMark) { + G1CollectedHeap::heap()->prepare_for_verify(); + G1CollectedHeap::heap()->verify(true,false); + } +#endif +} + +void ConcurrentMark::completeCleanup() { + // A full collection intervened. + if (has_aborted()) return; + + int first = 0; + int last = (int)MAX2(ParallelGCThreads, (size_t)1); + for (int t = 0; t < last; t++) { + UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; + assert(list->well_formed(), "Inv"); + HeapRegion* hd = list->hd(); + while (hd != NULL) { + // Now finish up the other stuff. + hd->rem_set()->clear(); + HeapRegion* next_hd = hd->next_from_unclean_list(); + (void)list->pop(); + guarantee(list->hd() == next_hd, "how not?"); + _g1h->put_region_on_unclean_list(hd); + if (!hd->isHumongous()) { + // Add this to the _free_regions count by 1. + _g1h->finish_free_region_work(0, 0, 1, NULL); + } + hd = list->hd(); + guarantee(hd == next_hd, "how not?"); + } + } +} + + +class G1CMIsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; + public: + G1CMIsAliveClosure(G1CollectedHeap* g1) : + _g1(g1) + {} + + void do_object(oop obj) { + assert(false, "not to be invoked"); + } + bool do_object_b(oop obj) { + HeapWord* addr = (HeapWord*)obj; + return addr != NULL && + (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); + } +}; + +class G1CMKeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + CMBitMap* _bitMap; + public: + G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, + CMBitMap* bitMap) : + _g1(g1), _cm(cm), + _bitMap(bitMap) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop thisOop = *p; + HeapWord* addr = (HeapWord*)thisOop; + if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { + _bitMap->mark(addr); + _cm->mark_stack_push(thisOop); + } + } +}; + +class G1CMDrainMarkingStackClosure: public VoidClosure { + CMMarkStack* _markStack; + CMBitMap* _bitMap; + G1CMKeepAliveClosure* _oopClosure; + public: + G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, + G1CMKeepAliveClosure* oopClosure) : + _bitMap(bitMap), + _markStack(markStack), + _oopClosure(oopClosure) + {} + + void do_void() { + _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); + } +}; + +void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { + ResourceMark rm; + HandleMark hm; + ReferencePolicy* soft_ref_policy; + + // Process weak references. + if (clear_all_soft_refs) { + soft_ref_policy = new AlwaysClearPolicy(); + } else { +#ifdef COMPILER2 + soft_ref_policy = new LRUMaxHeapPolicy(); +#else + soft_ref_policy = new LRUCurrentHeapPolicy(); +#endif + } + assert(_markStack.isEmpty(), "mark stack should be empty"); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1CMIsAliveClosure g1IsAliveClosure(g1); + + G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap()); + G1CMDrainMarkingStackClosure + g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, + &g1KeepAliveClosure); + + // XXXYYY Also: copy the parallel ref processing code from CMS. + ReferenceProcessor* rp = g1->ref_processor(); + rp->process_discovered_references(soft_ref_policy, + &g1IsAliveClosure, + &g1KeepAliveClosure, + &g1DrainMarkingStackClosure, + NULL); + assert(_markStack.overflow() || _markStack.isEmpty(), + "mark stack should be empty (unless it overflowed)"); + if (_markStack.overflow()) { + set_has_overflown(); + } + + rp->enqueue_discovered_references(); + rp->verify_no_references_recorded(); + assert(!rp->discovery_enabled(), "should have been disabled"); + + // Now clean up stale oops in SymbolTable and StringTable + SymbolTable::unlink(&g1IsAliveClosure); + StringTable::unlink(&g1IsAliveClosure); +} + +void ConcurrentMark::swapMarkBitMaps() { + CMBitMapRO* temp = _prevMarkBitMap; + _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; + _nextMarkBitMap = (CMBitMap*) temp; +} + +class CMRemarkTask: public AbstractGangTask { +private: + ConcurrentMark *_cm; + +public: + void work(int worker_i) { + // Since all available tasks are actually started, we should + // only proceed if we're supposed to be actived. + if ((size_t)worker_i < _cm->active_tasks()) { + CMTask* task = _cm->task(worker_i); + task->record_start_time(); + do { + task->do_marking_step(1000000000.0 /* something very large */); + } while (task->has_aborted() && !_cm->has_overflown()); + // If we overflow, then we do not want to restart. We instead + // want to abort remark and do concurrent marking again. + task->record_end_time(); + } + } + + CMRemarkTask(ConcurrentMark* cm) : + AbstractGangTask("Par Remark"), _cm(cm) { } +}; + +void ConcurrentMark::checkpointRootsFinalWork() { + ResourceMark rm; + HandleMark hm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + g1h->ensure_parsability(false); + + if (ParallelGCThreads > 0) { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = ParallelGCThreads; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&remarkTask); + g1h->set_par_threads(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } else { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = 1; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + remarkTask.work(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } + + print_stats(); + + if (!restart_for_overflow()) + set_non_marking_state(); + +#if VERIFY_OBJS_PROCESSED + if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { + gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", + _scan_obj_cl.objs_processed, + ThreadLocalObjQueue::objs_enqueued); + guarantee(_scan_obj_cl.objs_processed == + ThreadLocalObjQueue::objs_enqueued, + "Different number of objs processed and enqueued."); + } +#endif +} + +class ReachablePrinterOopClosure: public OopClosure { +private: + G1CollectedHeap* _g1h; + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + const char* str = NULL; + const char* str2 = ""; + + if (!_g1h->is_in_g1_reserved(obj)) + str = "outside G1 reserved"; + else { + HeapRegion* hr = _g1h->heap_region_containing(obj); + guarantee( hr != NULL, "invariant" ); + if (hr->obj_allocated_since_prev_marking(obj)) { + str = "over TAMS"; + if (_bitmap->isMarked((HeapWord*) obj)) + str2 = " AND MARKED"; + } else if (_bitmap->isMarked((HeapWord*) obj)) + str = "marked"; + else + str = "#### NOT MARKED ####"; + } + + _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", + p, (void*) obj, str, str2); + } +}; + +class ReachablePrinterClosure: public BitMapClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } + + bool do_bit(size_t offset) { + HeapWord* addr = _bitmap->offsetToHeapWord(offset); + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); + oop(addr)->oop_iterate(&oopCl); + _out->print_cr(""); + + return true; + } +}; + +class ObjInRegionReachablePrinterClosure : public ObjectClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + void do_object(oop o) { + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); + o->oop_iterate(&oopCl); + _out->print_cr(""); + } + + ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +class RegionReachablePrinterClosure : public HeapRegionClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + bool doHeapRegion(HeapRegion* hr) { + HeapWord* b = hr->bottom(); + HeapWord* e = hr->end(); + HeapWord* t = hr->top(); + HeapWord* p = hr->prev_top_at_mark_start(); + _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " + "PTAMS: "PTR_FORMAT, b, e, t, p); + _out->print_cr(""); + + ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); + hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); + + return false; + } + + RegionReachablePrinterClosure(CMBitMapRO* bitmap, + outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +void ConcurrentMark::print_prev_bitmap_reachable() { + outputStream* out = gclog_or_tty; + +#if SEND_HEAP_DUMP_TO_FILE + guarantee(heap_dump_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); + heap_dump_file = fopen(fn_buf, "w"); + fileStream fstream(heap_dump_file); + out = &fstream; +#endif // SEND_HEAP_DUMP_TO_FILE + + RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); + out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); + _g1h->heap_region_iterate(&rcl); + out->print_cr(""); + + ReachablePrinterClosure cl(_prevMarkBitMap, out); + out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); + _prevMarkBitMap->iterate(&cl); + out->print_cr(""); + +#if SEND_HEAP_DUMP_TO_FILE + fclose(heap_dump_file); + heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE +} + +// This note is for drainAllSATBBuffers and the code in between. +// In the future we could reuse a task to do this work during an +// evacuation pause (since now tasks are not active and can be claimed +// during an evacuation pause). This was a late change to the code and +// is currently not being taken advantage of. + +class CMGlobalObjectClosure : public ObjectClosure { +private: + ConcurrentMark* _cm; + +public: + void do_object(oop obj) { + _cm->deal_with_reference(obj); + } + + CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } +}; + +void ConcurrentMark::deal_with_reference(oop obj) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, + (void*) obj); + + + HeapWord* objAddr = (HeapWord*) obj; + if (_g1h->is_in_g1_reserved(objAddr)) { + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (_g1h->is_obj_ill(obj, hr)) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " + "marked", (void*) obj); + + // we need to mark it first + if (_nextMarkBitMap->parMark(objAddr)) { + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(objAddr) above + HeapWord* finger = _finger; + if (objAddr < finger) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] below the global finger " + "("PTR_FORMAT"), pushing it", finger); + if (!mark_stack_push(obj)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow during " + "deal_with_reference"); + } + } + } + } + } +} + +void ConcurrentMark::drainAllSATBBuffers() { + CMGlobalObjectClosure oc(this); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_closure(&oc); + + while (satb_mq_set.apply_closure_to_completed_buffer()) { + if (verbose_medium()) + gclog_or_tty->print_cr("[global] processed an SATB buffer"); + } + + // no need to check whether we should do this, as this is only + // called during an evacuation pause + satb_mq_set.iterate_closure_all_threads(); + + satb_mq_set.set_closure(NULL); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); +} + +void ConcurrentMark::markPrev(oop p) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); +} + +void ConcurrentMark::clear(oop p) { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _nextMarkBitMap->startWord() || + addr < _nextMarkBitMap->endWord(), "in a region"); + + _nextMarkBitMap->clear(addr); +} + +void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); + _nextMarkBitMap->clearRange(mr); +} + +HeapRegion* +ConcurrentMark::claim_region(int task_num) { + // "checkpoint" the finger + HeapWord* finger = _finger; + + // _heap_end will not change underneath our feet; it only changes at + // yield points. + while (finger < _heap_end) { + tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); + + // is the gap between reading the finger and doing the CAS too long? + + HeapRegion* curr_region = _g1h->heap_region_containing(finger); + HeapWord* bottom = curr_region->bottom(); + HeapWord* end = curr_region->end(); + HeapWord* limit = curr_region->next_top_at_mark_start(); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT"), " + "limit = "PTR_FORMAT, + task_num, curr_region, bottom, end, limit); + + HeapWord* res = + (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); + if (res == finger) { + // we succeeded + + // notice that _finger == end cannot be guaranteed here since, + // someone else might have moved the finger even further + guarantee( _finger >= end, "the finger should have moved forward" ); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] we were successful with region = " + PTR_FORMAT, task_num, curr_region); + + if (limit > bottom) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " + "returning it ", task_num, curr_region); + return curr_region; + } else { + tmp_guarantee_CM( limit == bottom, + "the region limit should be at bottom" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " + "returning NULL", task_num, curr_region); + // we return NULL and the caller should try calling + // claim_region() again. + return NULL; + } + } else { + guarantee( _finger > finger, "the finger should have moved forward" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] somebody else moved the finger, " + "global finger = "PTR_FORMAT", " + "our finger = "PTR_FORMAT, + task_num, _finger, finger); + + // read it again + finger = _finger; + } + } + + return NULL; +} + +void ConcurrentMark::oops_do(OopClosure* cl) { + if (_markStack.size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning the global marking stack, " + "size = %d", _markStack.size()); + // we first iterate over the contents of the mark stack... + _markStack.oops_do(cl); + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue((int)i); + + if (queue->size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning task queue of task %d, " + "size = %d", i, queue->size()); + + // ...then over the contents of the all the task queues. + queue->oops_do(cl); + } + + // finally, invalidate any entries that in the region stack that + // point into the collection set + if (_regionStack.invalidate_entries_into_cset()) { + // otherwise, any gray objects copied during the evacuation pause + // might not be visited. + guarantee( _should_gray_objects, "invariant" ); + } +} + +void ConcurrentMark::clear_marking_state() { + _markStack.setEmpty(); + _markStack.clear_overflow(); + _regionStack.setEmpty(); + _regionStack.clear_overflow(); + clear_has_overflown(); + _finger = _heap_start; + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue(i); + queue->set_empty(); + } +} + +void ConcurrentMark::print_stats() { + if (verbose_stats()) { + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + for (size_t i = 0; i < _active_tasks; ++i) { + _tasks[i]->print_stats(); + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + } + } +} + +class CSMarkOopClosure: public OopClosure { + friend class CSMarkBitMapClosure; + + G1CollectedHeap* _g1h; + CMBitMap* _bm; + ConcurrentMark* _cm; + oop* _ms; + jint* _array_ind_stack; + int _ms_size; + int _ms_ind; + int _array_increment; + + bool push(oop obj, int arr_ind = 0) { + if (_ms_ind == _ms_size) { + gclog_or_tty->print_cr("Mark stack is full."); + return false; + } + _ms[_ms_ind] = obj; + if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; + _ms_ind++; + return true; + } + + oop pop() { + if (_ms_ind == 0) return NULL; + else { + _ms_ind--; + return _ms[_ms_ind]; + } + } + + bool drain() { + while (_ms_ind > 0) { + oop obj = pop(); + assert(obj != NULL, "Since index was non-zero."); + if (obj->is_objArray()) { + jint arr_ind = _array_ind_stack[_ms_ind]; + objArrayOop aobj = objArrayOop(obj); + jint len = aobj->length(); + jint next_arr_ind = arr_ind + _array_increment; + if (next_arr_ind < len) { + push(obj, next_arr_ind); + } + // Now process this portion of this one. + int lim = MIN2(next_arr_ind, len); + assert(!UseCompressedOops, "This needs to be fixed"); + for (int j = arr_ind; j < lim; j++) { + do_oop(aobj->obj_at_addr(j)); + } + + } else { + obj->oop_iterate(this); + } + if (abort()) return false; + } + return true; + } + +public: + CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _cm(cm), + _bm(cm->nextMarkBitMap()), + _ms_size(ms_size), _ms_ind(0), + _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), + _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), + _array_increment(MAX2(ms_size/8, 16)) + {} + + ~CSMarkOopClosure() { + FREE_C_HEAP_ARRAY(oop, _ms); + FREE_C_HEAP_ARRAY(jint, _array_ind_stack); + } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + if (obj == NULL) return; + if (obj->is_forwarded()) { + // If the object has already been forwarded, we have to make sure + // that it's marked. So follow the forwarding pointer. Note that + // this does the right thing for self-forwarding pointers in the + // evacuation failure case. + obj = obj->forwardee(); + } + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (hr != NULL) { + if (hr->in_collection_set()) { + if (_g1h->is_obj_ill(obj)) { + _bm->mark((HeapWord*)obj); + if (!push(obj)) { + gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); + set_abort(); + } + } + } else { + // Outside the collection set; we need to gray it + _cm->deal_with_reference(obj); + } + } + } +}; + +class CSMarkBitMapClosure: public BitMapClosure { + G1CollectedHeap* _g1h; + CMBitMap* _bitMap; + ConcurrentMark* _cm; + CSMarkOopClosure _oop_cl; +public: + CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _bitMap(cm->nextMarkBitMap()), + _oop_cl(cm, ms_size) + {} + + ~CSMarkBitMapClosure() {} + + bool do_bit(size_t offset) { + // convert offset into a HeapWord* + HeapWord* addr = _bitMap->offsetToHeapWord(offset); + assert(_bitMap->endWord() && addr < _bitMap->endWord(), + "address out of range"); + assert(_bitMap->isMarked(addr), "tautology"); + oop obj = oop(addr); + if (!obj->is_forwarded()) { + if (!_oop_cl.push(obj)) return false; + if (!_oop_cl.drain()) return false; + } + // Otherwise... + return true; + } +}; + + +class CompleteMarkingInCSHRClosure: public HeapRegionClosure { + CMBitMap* _bm; + CSMarkBitMapClosure _bit_cl; + enum SomePrivateConstants { + MSSize = 1000 + }; + bool _completed; +public: + CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : + _bm(cm->nextMarkBitMap()), + _bit_cl(cm, MSSize), + _completed(true) + {} + + ~CompleteMarkingInCSHRClosure() {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->evacuation_failed()) { + MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); + if (!mr.is_empty()) { + if (!_bm->iterate(&_bit_cl, mr)) { + _completed = false; + return true; + } + } + } + return false; + } + + bool completed() { return _completed; } +}; + +class ClearMarksInHRClosure: public HeapRegionClosure { + CMBitMap* _bm; +public: + ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } + + bool doHeapRegion(HeapRegion* r) { + if (!r->used_region().is_empty() && !r->evacuation_failed()) { + MemRegion usedMR = r->used_region(); + _bm->clearRange(r->used_region()); + } + return false; + } +}; + +void ConcurrentMark::complete_marking_in_collection_set() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + if (!g1h->mark_in_progress()) { + g1h->g1_policy()->record_mark_closure_time(0.0); + return; + } + + int i = 1; + double start = os::elapsedTime(); + while (true) { + i++; + CompleteMarkingInCSHRClosure cmplt(this); + g1h->collection_set_iterate(&cmplt); + if (cmplt.completed()) break; + } + double end_time = os::elapsedTime(); + double elapsed_time_ms = (end_time - start) * 1000.0; + g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); + if (PrintGCDetails) { + gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); + } + + ClearMarksInHRClosure clr(nextMarkBitMap()); + g1h->collection_set_iterate(&clr); +} + +// The next two methods deal with the following optimisation. Some +// objects are gray by being marked and located above the finger. If +// they are copied, during an evacuation pause, below the finger then +// the need to be pushed on the stack. The observation is that, if +// there are no regions in the collection set located above the +// finger, then the above cannot happen, hence we do not need to +// explicitly gray any objects when copying them to below the +// finger. The global stack will be scanned to ensure that, if it +// points to objects being copied, it will update their +// location. There is a tricky situation with the gray objects in +// region stack that are being coped, however. See the comment in +// newCSet(). + +void ConcurrentMark::newCSet() { + if (!concurrent_marking_in_progress()) + // nothing to do if marking is not in progress + return; + + // find what the lowest finger is among the global and local fingers + _min_finger = _finger; + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + HeapWord* task_finger = task->finger(); + if (task_finger != NULL && task_finger < _min_finger) + _min_finger = task_finger; + } + + _should_gray_objects = false; + + // This fixes a very subtle and fustrating bug. It might be the case + // that, during en evacuation pause, heap regions that contain + // objects that are gray (by being in regions contained in the + // region stack) are included in the collection set. Since such gray + // objects will be moved, and because it's not easy to redirect + // region stack entries to point to a new location (because objects + // in one region might be scattered to multiple regions after they + // are copied), one option is to ensure that all marked objects + // copied during a pause are pushed on the stack. Notice, however, + // that this problem can only happen when the region stack is not + // empty during an evacuation pause. So, we make the fix a bit less + // conservative and ensure that regions are pushed on the stack, + // irrespective whether all collection set regions are below the + // finger, if the region stack is not empty. This is expected to be + // a rare case, so I don't think it's necessary to be smarted about it. + if (!region_stack_empty()) + _should_gray_objects = true; +} + +void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { + if (!concurrent_marking_in_progress()) + return; + + HeapWord* region_end = hr->end(); + if (region_end > _min_finger) + _should_gray_objects = true; +} + +void ConcurrentMark::disable_co_trackers() { + if (has_aborted()) { + if (_cleanup_co_tracker.enabled()) + _cleanup_co_tracker.disable(); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + if (task->co_tracker_enabled()) + task->disable_co_tracker(); + } + } else { + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + guarantee( !task->co_tracker_enabled(), "invariant" ); + } + } +} + +// abandon current marking iteration due to a Full GC +void ConcurrentMark::abort() { + // If we're not marking, nothing to do. + if (!G1ConcMark) return; + + // Clear all marks to force marking thread to do nothing + _nextMarkBitMap->clearAll(); + // Empty mark stack + clear_marking_state(); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->clear_region_fields(); + _has_aborted = true; + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.abandon_partial_marking(); + satb_mq_set.set_active_all_threads(false); +} + +static void print_ms_time_info(const char* prefix, const char* name, + NumberSeq& ns) { + gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", + prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); + if (ns.num() > 0) { + gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", + prefix, ns.sd(), ns.maximum()); + } +} + +void ConcurrentMark::print_summary_info() { + gclog_or_tty->print_cr(" Concurrent marking:"); + print_ms_time_info(" ", "init marks", _init_times); + print_ms_time_info(" ", "remarks", _remark_times); + { + print_ms_time_info(" ", "final marks", _remark_mark_times); + print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); + + } + print_ms_time_info(" ", "cleanups", _cleanup_times); + gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", + _total_counting_time, + (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + if (G1ScrubRemSets) { + gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", + _total_rs_scrub_time, + (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + } + gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", + (_init_times.sum() + _remark_times.sum() + + _cleanup_times.sum())/1000.0); + gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " + "(%8.2f s marking, %8.2f s counting).", + cmThread()->vtime_accum(), + cmThread()->vtime_mark_accum(), + cmThread()->vtime_count_accum()); +} + +// Closures +// XXX: there seems to be a lot of code duplication here; +// should refactor and consolidate the shared code. + +// This closure is used to mark refs into the CMS generation in +// the CMS bit map. Called at the first checkpoint. + +// We take a break if someone is trying to stop the world. +bool ConcurrentMark::do_yield_check(int worker_i) { + if (should_yield()) { + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause(); + cmThread()->yield(); + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause_end(); + return true; + } else { + return false; + } +} + +bool ConcurrentMark::should_yield() { + return cmThread()->should_yield(); +} + +bool ConcurrentMark::containing_card_is_marked(void* p) { + size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); + return _card_bm.at(offset >> CardTableModRefBS::card_shift); +} + +bool ConcurrentMark::containing_cards_are_marked(void* start, + void* last) { + return + containing_card_is_marked(start) && + containing_card_is_marked(last); +} + +#ifndef PRODUCT +// for debugging purposes +void ConcurrentMark::print_finger() { + gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, + _heap_start, _heap_end, _finger); + for (int i = 0; i < (int) _max_task_num; ++i) { + gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); + } + gclog_or_tty->print_cr(""); +} +#endif + +// Closure for iteration over bitmaps +class CMBitMapClosure : public BitMapClosure { +private: + // the bitmap that is being iterated over + CMBitMap* _nextMarkBitMap; + ConcurrentMark* _cm; + CMTask* _task; + // true if we're scanning a heap region claimed by the task (so that + // we move the finger along), false if we're not, i.e. currently when + // scanning a heap region popped from the region stack (so that we + // do not move the task finger along; it'd be a mistake if we did so). + bool _scanning_heap_region; + +public: + CMBitMapClosure(CMTask *task, + ConcurrentMark* cm, + CMBitMap* nextMarkBitMap) + : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } + + void set_scanning_heap_region(bool scanning_heap_region) { + _scanning_heap_region = scanning_heap_region; + } + + bool do_bit(size_t offset) { + HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); + tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" ); + tmp_guarantee_CM( addr < _cm->finger(), "invariant" ); + + if (_scanning_heap_region) { + statsOnly( _task->increase_objs_found_on_bitmap() ); + tmp_guarantee_CM( addr >= _task->finger(), "invariant" ); + // We move that task's local finger along. + _task->move_finger_to(addr); + } else { + // We move the task's region finger along. + _task->move_region_finger_to(addr); + } + + _task->scan_object(oop(addr)); + // we only partially drain the local queue and global stack + _task->drain_local_queue(true); + _task->drain_global_stack(true); + + // if the has_aborted flag has been raised, we need to bail out of + // the iteration + return !_task->has_aborted(); + } +}; + +// Closure for iterating over objects, currently only used for +// processing SATB buffers. +class CMObjectClosure : public ObjectClosure { +private: + CMTask* _task; + +public: + void do_object(oop obj) { + _task->deal_with_reference(obj); + } + + CMObjectClosure(CMTask* task) : _task(task) { } +}; + +// Closure for iterating over object fields +class CMOopClosure : public OopClosure { +private: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + CMTask* _task; + +public: + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" ); + + oop obj = *p; + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're looking at location " + "*"PTR_FORMAT" = "PTR_FORMAT, + _task->task_id(), p, (void*) obj); + _task->deal_with_reference(obj); + } + + CMOopClosure(G1CollectedHeap* g1h, + ConcurrentMark* cm, + CMTask* task) + : _g1h(g1h), _cm(cm), _task(task) { } +}; + +void CMTask::setup_for_region(HeapRegion* hr) { + tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(), + "claim_region() should have filtered out continues humongous regions" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, + _task_id, hr); + + _curr_region = hr; + _finger = hr->bottom(); + update_region_limit(); +} + +void CMTask::update_region_limit() { + HeapRegion* hr = _curr_region; + HeapWord* bottom = hr->bottom(); + HeapWord* limit = hr->next_top_at_mark_start(); + + if (limit == bottom) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] found an empty region " + "["PTR_FORMAT", "PTR_FORMAT")", + _task_id, bottom, limit); + // The region was collected underneath our feet. + // We set the finger to bottom to ensure that the bitmap + // iteration that will follow this will not do anything. + // (this is not a condition that holds when we set the region up, + // as the region is not supposed to be empty in the first place) + _finger = bottom; + } else if (limit >= _region_limit) { + tmp_guarantee_CM( limit >= _finger, "peace of mind" ); + } else { + tmp_guarantee_CM( limit < _region_limit, "only way to get here" ); + // This can happen under some pretty unusual circumstances. An + // evacuation pause empties the region underneath our feet (NTAMS + // at bottom). We then do some allocation in the region (NTAMS + // stays at bottom), followed by the region being used as a GC + // alloc region (NTAMS will move to top() and the objects + // originally below it will be grayed). All objects now marked in + // the region are explicitly grayed, if below the global finger, + // and we do not need in fact to scan anything else. So, we simply + // set _finger to be limit to ensure that the bitmap iteration + // doesn't do anything. + _finger = limit; + } + + _region_limit = limit; +} + +void CMTask::giveup_current_region() { + tmp_guarantee_CM( _curr_region != NULL, "invariant" ); + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, + _task_id, _curr_region); + clear_region_fields(); +} + +void CMTask::clear_region_fields() { + // Values for these three fields that indicate that we're not + // holding on to a region. + _curr_region = NULL; + _finger = NULL; + _region_limit = NULL; + + _region_finger = NULL; +} + +void CMTask::reset(CMBitMap* nextMarkBitMap) { + guarantee( nextMarkBitMap != NULL, "invariant" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] resetting", _task_id); + + _nextMarkBitMap = nextMarkBitMap; + clear_region_fields(); + + _calls = 0; + _elapsed_time_ms = 0.0; + _termination_time_ms = 0.0; + _termination_start_time_ms = 0.0; + +#if _MARKING_STATS_ + _local_pushes = 0; + _local_pops = 0; + _local_max_size = 0; + _objs_scanned = 0; + _global_pushes = 0; + _global_pops = 0; + _global_max_size = 0; + _global_transfers_to = 0; + _global_transfers_from = 0; + _region_stack_pops = 0; + _regions_claimed = 0; + _objs_found_on_bitmap = 0; + _satb_buffers_processed = 0; + _steal_attempts = 0; + _steals = 0; + _aborted = 0; + _aborted_overflow = 0; + _aborted_cm_aborted = 0; + _aborted_yield = 0; + _aborted_timed_out = 0; + _aborted_satb = 0; + _aborted_termination = 0; +#endif // _MARKING_STATS_ +} + +bool CMTask::should_exit_termination() { + regular_clock_call(); + // This is called when we are in the termination protocol. We should + // quit if, for some reason, this task wants to abort or the global + // stack is not empty (this means that we can get work from it). + return !_cm->mark_stack_empty() || has_aborted(); +} + +// This determines whether the method below will check both the local +// and global fingers when determining whether to push on the stack a +// gray object (value 1) or whether it will only check the global one +// (value 0). The tradeoffs are that the former will be a bit more +// accurate and possibly push less on the stack, but it might also be +// a little bit slower. + +#define _CHECK_BOTH_FINGERS_ 1 + +void CMTask::deal_with_reference(oop obj) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, + _task_id, (void*) obj); + + ++_refs_reached; + + HeapWord* objAddr = (HeapWord*) obj; + if (_g1h->is_in_g1_reserved(objAddr)) { + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (_g1h->is_obj_ill(obj, hr)) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", + _task_id, (void*) obj); + + // we need to mark it first + if (_nextMarkBitMap->parMark(objAddr)) { + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(objAddr) above + HeapWord* global_finger = _cm->finger(); + +#if _CHECK_BOTH_FINGERS_ + // we will check both the local and global fingers + + if (_finger != NULL && objAddr < _finger) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " + "pushing it", _task_id, _finger); + push(obj); + } else if (_curr_region != NULL && objAddr < _region_limit) { + // do nothing + } else if (objAddr < global_finger) { + // Notice that the global finger might be moving forward + // concurrently. This is not a problem. In the worst case, we + // mark the object while it is above the global finger and, by + // the time we read the global finger, it has moved forward + // passed this object. In this case, the object will probably + // be visited when a task is scanning the region and will also + // be pushed on the stack. So, some duplicate work, but no + // correctness problems. + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the global finger " + "("PTR_FORMAT"), pushing it", + _task_id, global_finger); + push(obj); + } else { + // do nothing + } +#else // _CHECK_BOTH_FINGERS_ + // we will only check the global finger + + if (objAddr < global_finger) { + // see long comment above + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the global finger " + "("PTR_FORMAT"), pushing it", + _task_id, global_finger); + push(obj); + } +#endif // _CHECK_BOTH_FINGERS_ + } + } + } +} + +void CMTask::push(oop obj) { + HeapWord* objAddr = (HeapWord*) obj; + tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" ); + tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" ); + tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); + + if (!_task_queue->push(obj)) { + // The local task queue looks full. We need to push some entries + // to the global stack. + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] task queue overflow, " + "moving entries to the global stack", + _task_id); + move_entries_to_global_stack(); + + // this should succeed since, even if we overflow the global + // stack, we should have definitely removed some entries from the + // local queue. So, there must be space on it. + bool success = _task_queue->push(obj); + tmp_guarantee_CM( success, "invariant" ); + } + + statsOnly( int tmp_size = _task_queue->size(); + if (tmp_size > _local_max_size) + _local_max_size = tmp_size; + ++_local_pushes ); +} + +void CMTask::reached_limit() { + tmp_guarantee_CM( _words_scanned >= _words_scanned_limit || + _refs_reached >= _refs_reached_limit , + "shouldn't have been called otherwise" ); + regular_clock_call(); +} + +void CMTask::regular_clock_call() { + if (has_aborted()) + return; + + // First, we need to recalculate the words scanned and refs reached + // limits for the next clock call. + recalculate_limits(); + + // During the regular clock call we do the following + + // (1) If an overflow has been flagged, then we abort. + if (_cm->has_overflown()) { + set_has_aborted(); + return; + } + + // If we are not concurrent (i.e. we're doing remark) we don't need + // to check anything else. The other steps are only needed during + // the concurrent marking phase. + if (!concurrent()) + return; + + // (2) If marking has been aborted for Full GC, then we also abort. + if (_cm->has_aborted()) { + set_has_aborted(); + statsOnly( ++_aborted_cm_aborted ); + return; + } + + double curr_time_ms = os::elapsedVTime() * 1000.0; + + // (3) If marking stats are enabled, then we update the step history. +#if _MARKING_STATS_ + if (_words_scanned >= _words_scanned_limit) + ++_clock_due_to_scanning; + if (_refs_reached >= _refs_reached_limit) + ++_clock_due_to_marking; + + double last_interval_ms = curr_time_ms - _interval_start_time_ms; + _interval_start_time_ms = curr_time_ms; + _all_clock_intervals_ms.add(last_interval_ms); + + if (_cm->verbose_medium()) { + gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " + "scanned = %d%s, refs reached = %d%s", + _task_id, last_interval_ms, + _words_scanned, + (_words_scanned >= _words_scanned_limit) ? " (*)" : "", + _refs_reached, + (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); + } +#endif // _MARKING_STATS_ + + // (4) We check whether we should yield. If we have to, then we abort. + if (_cm->should_yield()) { + // We should yield. To do this we abort the task. The caller is + // responsible for yielding. + set_has_aborted(); + statsOnly( ++_aborted_yield ); + return; + } + + // (5) We check whether we've reached our time quota. If we have, + // then we abort. + double elapsed_time_ms = curr_time_ms - _start_time_ms; + if (elapsed_time_ms > _time_target_ms) { + set_has_aborted(); + _has_aborted_timed_out = true; + statsOnly( ++_aborted_timed_out ); + return; + } + + // (6) Finally, we check whether there are enough completed STAB + // buffers available for processing. If there are, we abort. + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", + _task_id); + // we do need to process SATB buffers, we'll abort and restart + // the marking task to do so + set_has_aborted(); + statsOnly( ++_aborted_satb ); + return; + } +} + +void CMTask::recalculate_limits() { + _real_words_scanned_limit = _words_scanned + words_scanned_period; + _words_scanned_limit = _real_words_scanned_limit; + + _real_refs_reached_limit = _refs_reached + refs_reached_period; + _refs_reached_limit = _real_refs_reached_limit; +} + +void CMTask::decrease_limits() { + // This is called when we believe that we're going to do an infrequent + // operation which will increase the per byte scanned cost (i.e. move + // entries to/from the global stack). It basically tries to decrease the + // scanning limit so that the clock is called earlier. + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); + + _words_scanned_limit = _real_words_scanned_limit - + 3 * words_scanned_period / 4; + _refs_reached_limit = _real_refs_reached_limit - + 3 * refs_reached_period / 4; +} + +void CMTask::move_entries_to_global_stack() { + // local array where we'll store the entries that will be popped + // from the local queue + oop buffer[global_stack_transfer_size]; + + int n = 0; + oop obj; + while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { + buffer[n] = obj; + ++n; + } + + if (n > 0) { + // we popped at least one entry from the local queue + + statsOnly( ++_global_transfers_to; _local_pops += n ); + + if (!_cm->mark_stack_push(buffer, n)) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); + set_has_aborted(); + } else { + // the transfer was successful + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", + _task_id, n); + statsOnly( int tmp_size = _cm->mark_stack_size(); + if (tmp_size > _global_max_size) + _global_max_size = tmp_size; + _global_pushes += n ); + } + } + + // this operation was quite expensive, so decrease the limits + decrease_limits(); +} + +void CMTask::get_entries_from_global_stack() { + // local array where we'll store the entries that will be popped + // from the global stack. + oop buffer[global_stack_transfer_size]; + int n; + _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); + tmp_guarantee_CM( n <= global_stack_transfer_size, + "we should not pop more than the given limit" ); + if (n > 0) { + // yes, we did actually pop at least one entry + + statsOnly( ++_global_transfers_from; _global_pops += n ); + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", + _task_id, n); + for (int i = 0; i < n; ++i) { + bool success = _task_queue->push(buffer[i]); + // We only call this when the local queue is empty or under a + // given target limit. So, we do not expect this push to fail. + tmp_guarantee_CM( success, "invariant" ); + } + + statsOnly( int tmp_size = _task_queue->size(); + if (tmp_size > _local_max_size) + _local_max_size = tmp_size; + _local_pushes += n ); + } + + // this operation was quite expensive, so decrease the limits + decrease_limits(); +} + +void CMTask::drain_local_queue(bool partially) { + if (has_aborted()) + return; + + // Decide what the target size is, depending whether we're going to + // drain it partially (so that other tasks can steal if they run out + // of things to do) or totally (at the very end). + size_t target_size; + if (partially) + target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); + else + target_size = 0; + + if (_task_queue->size() > target_size) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", + _task_id, target_size); + + oop obj; + bool ret = _task_queue->pop_local(obj); + while (ret) { + statsOnly( ++_local_pops ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, + (void*) obj); + + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj), + "invariant" ); + + scan_object(obj); + + if (_task_queue->size() <= target_size || has_aborted()) + ret = false; + else + ret = _task_queue->pop_local(obj); + } + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] drained local queue, size = %d", + _task_id, _task_queue->size()); + } +} + +void CMTask::drain_global_stack(bool partially) { + if (has_aborted()) + return; + + // We have a policy to drain the local queue before we attempt to + // drain the global stack. + tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" ); + + // Decide what the target size is, depending whether we're going to + // drain it partially (so that other tasks can steal if they run out + // of things to do) or totally (at the very end). Notice that, + // because we move entries from the global stack in chunks or + // because another task might be doing the same, we might in fact + // drop below the target. But, this is not a problem. + size_t target_size; + if (partially) + target_size = _cm->partial_mark_stack_size_target(); + else + target_size = 0; + + if (_cm->mark_stack_size() > target_size) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", + _task_id, target_size); + + while (!has_aborted() && _cm->mark_stack_size() > target_size) { + get_entries_from_global_stack(); + drain_local_queue(partially); + } + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] drained global stack, size = %d", + _task_id, _cm->mark_stack_size()); + } +} + +// SATB Queue has several assumptions on whether to call the par or +// non-par versions of the methods. this is why some of the code is +// replicated. We should really get rid of the single-threaded version +// of the code to simplify things. +void CMTask::drain_satb_buffers() { + if (has_aborted()) + return; + + // We set this so that the regular clock knows that we're in the + // middle of draining buffers and doesn't set the abort flag when it + // notices that SATB buffers are available for draining. It'd be + // very counter productive if it did that. :-) + _draining_satb_buffers = true; + + CMObjectClosure oc(this); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + if (ParallelGCThreads > 0) + satb_mq_set.set_par_closure(_task_id, &oc); + else + satb_mq_set.set_closure(&oc); + + // This keeps claiming and applying the closure to completed buffers + // until we run out of buffers or we need to abort. + if (ParallelGCThreads > 0) { + while (!has_aborted() && + satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); + statsOnly( ++_satb_buffers_processed ); + regular_clock_call(); + } + } else { + while (!has_aborted() && + satb_mq_set.apply_closure_to_completed_buffer()) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); + statsOnly( ++_satb_buffers_processed ); + regular_clock_call(); + } + } + + if (!concurrent() && !has_aborted()) { + // We should only do this during remark. + if (ParallelGCThreads > 0) + satb_mq_set.par_iterate_closure_all_threads(_task_id); + else + satb_mq_set.iterate_closure_all_threads(); + } + + _draining_satb_buffers = false; + + tmp_guarantee_CM( has_aborted() || + concurrent() || + satb_mq_set.completed_buffers_num() == 0, "invariant" ); + + if (ParallelGCThreads > 0) + satb_mq_set.set_par_closure(_task_id, NULL); + else + satb_mq_set.set_closure(NULL); + + // again, this was a potentially expensive operation, decrease the + // limits to get the regular clock call early + decrease_limits(); +} + +void CMTask::drain_region_stack(BitMapClosure* bc) { + if (has_aborted()) + return; + + tmp_guarantee_CM( _region_finger == NULL, + "it should be NULL when we're not scanning a region" ); + + if (!_cm->region_stack_empty()) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] draining region stack, size = %d", + _task_id, _cm->region_stack_size()); + + MemRegion mr = _cm->region_stack_pop(); + // it returns MemRegion() if the pop fails + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + + while (mr.start() != NULL) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] we are scanning region " + "["PTR_FORMAT", "PTR_FORMAT")", + _task_id, mr.start(), mr.end()); + tmp_guarantee_CM( mr.end() <= _cm->finger(), + "otherwise the region shouldn't be on the stack" ); + assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); + if (_nextMarkBitMap->iterate(bc, mr)) { + tmp_guarantee_CM( !has_aborted(), + "cannot abort the task without aborting the bitmap iteration" ); + + // We finished iterating over the region without aborting. + regular_clock_call(); + if (has_aborted()) + mr = MemRegion(); + else { + mr = _cm->region_stack_pop(); + // it returns MemRegion() if the pop fails + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + } + } else { + guarantee( has_aborted(), "currently the only way to do so" ); + + // The only way to abort the bitmap iteration is to return + // false from the do_bit() method. However, inside the + // do_bit() method we move the _region_finger to point to the + // object currently being looked at. So, if we bail out, we + // have definitely set _region_finger to something non-null. + guarantee( _region_finger != NULL, "invariant" ); + + // The iteration was actually aborted. So now _region_finger + // points to the address of the object we last scanned. If we + // leave it there, when we restart this task, we will rescan + // the object. It is easy to avoid this. We move the finger by + // enough to point to the next possible object header (the + // bitmap knows by how much we need to move it as it knows its + // granularity). + MemRegion newRegion = + MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); + + if (!newRegion.is_empty()) { + if (_cm->verbose_low()) { + gclog_or_tty->print_cr("[%d] pushing unscanned region" + "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", + _task_id, + newRegion.start(), newRegion.end()); + } + // Now push the part of the region we didn't scan on the + // region stack to make sure a task scans it later. + _cm->region_stack_push(newRegion); + } + // break from while + mr = MemRegion(); + } + _region_finger = NULL; + } + + // We only push regions on the region stack during evacuation + // pauses. So if we come out the above iteration because we region + // stack is empty, it will remain empty until the next yield + // point. So, the guarantee below is safe. + guarantee( has_aborted() || _cm->region_stack_empty(), + "only way to exit the loop" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] drained region stack, size = %d", + _task_id, _cm->region_stack_size()); + } +} + +void CMTask::print_stats() { + gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", + _task_id, _calls); + gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", + _elapsed_time_ms, _termination_time_ms); + gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", + _step_times_ms.num(), _step_times_ms.avg(), + _step_times_ms.sd()); + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", + _step_times_ms.maximum(), _step_times_ms.sum()); + +#if _MARKING_STATS_ + gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", + _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), + _all_clock_intervals_ms.sd()); + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", + _all_clock_intervals_ms.maximum(), + _all_clock_intervals_ms.sum()); + gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", + _clock_due_to_scanning, _clock_due_to_marking); + gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", + _objs_scanned, _objs_found_on_bitmap); + gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", + _local_pushes, _local_pops, _local_max_size); + gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", + _global_pushes, _global_pops, _global_max_size); + gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", + _global_transfers_to,_global_transfers_from); + gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", + _regions_claimed, _region_stack_pops); + gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); + gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", + _steal_attempts, _steals); + gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); + gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", + _aborted_overflow, _aborted_cm_aborted, _aborted_yield); + gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", + _aborted_timed_out, _aborted_satb, _aborted_termination); +#endif // _MARKING_STATS_ +} + +/***************************************************************************** + + The do_marking_step(time_target_ms) method is the building block + of the parallel marking framework. It can be called in parallel + with other invocations of do_marking_step() on different tasks + (but only one per task, obviously) and concurrently with the + mutator threads, or during remark, hence it eliminates the need + for two versions of the code. When called during remark, it will + pick up from where the task left off during the concurrent marking + phase. Interestingly, tasks are also claimable during evacuation + pauses too, since do_marking_step() ensures that it aborts before + it needs to yield. + + The data structures that is uses to do marking work are the + following: + + (1) Marking Bitmap. If there are gray objects that appear only + on the bitmap (this happens either when dealing with an overflow + or when the initial marking phase has simply marked the roots + and didn't push them on the stack), then tasks claim heap + regions whose bitmap they then scan to find gray objects. A + global finger indicates where the end of the last claimed region + is. A local finger indicates how far into the region a task has + scanned. The two fingers are used to determine how to gray an + object (i.e. whether simply marking it is OK, as it will be + visited by a task in the future, or whether it needs to be also + pushed on a stack). + + (2) Local Queue. The local queue of the task which is accessed + reasonably efficiently by the task. Other tasks can steal from + it when they run out of work. Throughout the marking phase, a + task attempts to keep its local queue short but not totally + empty, so that entries are available for stealing by other + tasks. Only when there is no more work, a task will totally + drain its local queue. + + (3) Global Mark Stack. This handles local queue overflow. During + marking only sets of entries are moved between it and the local + queues, as access to it requires a mutex and more fine-grain + interaction with it which might cause contention. If it + overflows, then the marking phase should restart and iterate + over the bitmap to identify gray objects. Throughout the marking + phase, tasks attempt to keep the global mark stack at a small + length but not totally empty, so that entries are available for + popping by other tasks. Only when there is no more work, tasks + will totally drain the global mark stack. + + (4) Global Region Stack. Entries on it correspond to areas of + the bitmap that need to be scanned since they contain gray + objects. Pushes on the region stack only happen during + evacuation pauses and typically correspond to areas covered by + GC LABS. If it overflows, then the marking phase should restart + and iterate over the bitmap to identify gray objects. Tasks will + try to totally drain the region stack as soon as possible. + + (5) SATB Buffer Queue. This is where completed SATB buffers are + made available. Buffers are regularly removed from this queue + and scanned for roots, so that the queue doesn't get too + long. During remark, all completed buffers are processed, as + well as the filled in parts of any uncompleted buffers. + + The do_marking_step() method tries to abort when the time target + has been reached. There are a few other cases when the + do_marking_step() method also aborts: + + (1) When the marking phase has been aborted (after a Full GC). + + (2) When a global overflow (either on the global stack or the + region stack) has been triggered. Before the task aborts, it + will actually sync up with the other tasks to ensure that all + the marking data structures (local queues, stacks, fingers etc.) + are re-initialised so that when do_marking_step() completes, + the marking phase can immediately restart. + + (3) When enough completed SATB buffers are available. The + do_marking_step() method only tries to drain SATB buffers right + at the beginning. So, if enough buffers are available, the + marking step aborts and the SATB buffers are processed at + the beginning of the next invocation. + + (4) To yield. when we have to yield then we abort and yield + right at the end of do_marking_step(). This saves us from a lot + of hassle as, by yielding we might allow a Full GC. If this + happens then objects will be compacted underneath our feet, the + heap might shrink, etc. We save checking for this by just + aborting and doing the yield right at the end. + + From the above it follows that the do_marking_step() method should + be called in a loop (or, otherwise, regularly) until it completes. + + If a marking step completes without its has_aborted() flag being + true, it means it has completed the current marking phase (and + also all other marking tasks have done so and have all synced up). + + A method called regular_clock_call() is invoked "regularly" (in + sub ms intervals) throughout marking. It is this clock method that + checks all the abort conditions which were mentioned above and + decides when the task should abort. A work-based scheme is used to + trigger this clock method: when the number of object words the + marking phase has scanned or the number of references the marking + phase has visited reach a given limit. Additional invocations to + the method clock have been planted in a few other strategic places + too. The initial reason for the clock method was to avoid calling + vtime too regularly, as it is quite expensive. So, once it was in + place, it was natural to piggy-back all the other conditions on it + too and not constantly check them throughout the code. + + *****************************************************************************/ + +void CMTask::do_marking_step(double time_target_ms) { + guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" ); + guarantee( concurrent() == _cm->concurrent(), "they should be the same" ); + + guarantee( concurrent() || _cm->region_stack_empty(), + "the region stack should have been cleared before remark" ); + guarantee( _region_finger == NULL, + "this should be non-null only when a region is being scanned" ); + + G1CollectorPolicy* g1_policy = _g1h->g1_policy(); + guarantee( _task_queues != NULL, "invariant" ); + guarantee( _task_queue != NULL, "invariant" ); + guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" ); + + guarantee( !_claimed, + "only one thread should claim this task at any one time" ); + + // OK, this doesn't safeguard again all possible scenarios, as it is + // possible for two threads to set the _claimed flag at the same + // time. But it is only for debugging purposes anyway and it will + // catch most problems. + _claimed = true; + + _start_time_ms = os::elapsedVTime() * 1000.0; + statsOnly( _interval_start_time_ms = _start_time_ms ); + + double diff_prediction_ms = + g1_policy->get_new_prediction(&_marking_step_diffs_ms); + _time_target_ms = time_target_ms - diff_prediction_ms; + + // set up the variables that are used in the work-based scheme to + // call the regular clock method + _words_scanned = 0; + _refs_reached = 0; + recalculate_limits(); + + // clear all flags + clear_has_aborted(); + _has_aborted_timed_out = false; + _draining_satb_buffers = false; + + ++_calls; + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " + "target = %1.2lfms >>>>>>>>>>", + _task_id, _calls, _time_target_ms); + + // Set up the bitmap and oop closures. Anything that uses them is + // eventually called from this method, so it is OK to allocate these + // statically. + CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); + CMOopClosure oop_closure(_g1h, _cm, this); + set_oop_closure(&oop_closure); + + if (_cm->has_overflown()) { + // This can happen if the region stack or the mark stack overflows + // during a GC pause and this task, after a yield point, + // restarts. We have to abort as we need to get into the overflow + // protocol which happens right at the end of this task. + set_has_aborted(); + } + + // First drain any available SATB buffers. After this, we will not + // look at SATB buffers before the next invocation of this method. + // If enough completed SATB buffers are queued up, the regular clock + // will abort this task so that it restarts. + drain_satb_buffers(); + // ...then partially drain the local queue and the global stack + drain_local_queue(true); + drain_global_stack(true); + + // Then totally drain the region stack. We will not look at + // it again before the next invocation of this method. Entries on + // the region stack are only added during evacuation pauses, for + // which we have to yield. When we do, we abort the task anyway so + // it will look at the region stack again when it restarts. + bitmap_closure.set_scanning_heap_region(false); + drain_region_stack(&bitmap_closure); + // ...then partially drain the local queue and the global stack + drain_local_queue(true); + drain_global_stack(true); + + do { + if (!has_aborted() && _curr_region != NULL) { + // This means that we're already holding on to a region. + tmp_guarantee_CM( _finger != NULL, + "if region is not NULL, then the finger " + "should not be NULL either" ); + + // We might have restarted this task after an evacuation pause + // which might have evacuated the region we're holding on to + // underneath our feet. Let's read its limit again to make sure + // that we do not iterate over a region of the heap that + // contains garbage (update_region_limit() will also move + // _finger to the start of the region if it is found empty). + update_region_limit(); + // We will start from _finger not from the start of the region, + // as we might be restarting this task after aborting half-way + // through scanning this region. In this case, _finger points to + // the address where we last found a marked object. If this is a + // fresh region, _finger points to start(). + MemRegion mr = MemRegion(_finger, _region_limit); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] we're scanning part " + "["PTR_FORMAT", "PTR_FORMAT") " + "of region "PTR_FORMAT, + _task_id, _finger, _region_limit, _curr_region); + + // Let's iterate over the bitmap of the part of the + // region that is left. + bitmap_closure.set_scanning_heap_region(true); + if (mr.is_empty() || + _nextMarkBitMap->iterate(&bitmap_closure, mr)) { + // We successfully completed iterating over the region. Now, + // let's give up the region. + giveup_current_region(); + regular_clock_call(); + } else { + guarantee( has_aborted(), "currently the only way to do so" ); + // The only way to abort the bitmap iteration is to return + // false from the do_bit() method. However, inside the + // do_bit() method we move the _finger to point to the + // object currently being looked at. So, if we bail out, we + // have definitely set _finger to something non-null. + guarantee( _finger != NULL, "invariant" ); + + // Region iteration was actually aborted. So now _finger + // points to the address of the object we last scanned. If we + // leave it there, when we restart this task, we will rescan + // the object. It is easy to avoid this. We move the finger by + // enough to point to the next possible object header (the + // bitmap knows by how much we need to move it as it knows its + // granularity). + move_finger_to(_nextMarkBitMap->nextWord(_finger)); + } + } + // At this point we have either completed iterating over the + // region we were holding on to, or we have aborted. + + // We then partially drain the local queue and the global stack. + // (Do we really need this?) + drain_local_queue(true); + drain_global_stack(true); + + // Read the note on the claim_region() method on why it might + // return NULL with potentially more regions available for + // claiming and why we have to check out_of_regions() to determine + // whether we're done or not. + while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { + // We are going to try to claim a new region. We should have + // given up on the previous one. + tmp_guarantee_CM( _curr_region == NULL && + _finger == NULL && + _region_limit == NULL, "invariant" ); + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); + HeapRegion* claimed_region = _cm->claim_region(_task_id); + if (claimed_region != NULL) { + // Yes, we managed to claim one + statsOnly( ++_regions_claimed ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] we successfully claimed " + "region "PTR_FORMAT, + _task_id, claimed_region); + + setup_for_region(claimed_region); + tmp_guarantee_CM( _curr_region == claimed_region, "invariant" ); + } + // It is important to call the regular clock here. It might take + // a while to claim a region if, for example, we hit a large + // block of empty regions. So we need to call the regular clock + // method once round the loop to make sure it's called + // frequently enough. + regular_clock_call(); + } + + if (!has_aborted() && _curr_region == NULL) { + tmp_guarantee_CM( _cm->out_of_regions(), + "at this point we should be out of regions" ); + } + } while ( _curr_region != NULL && !has_aborted()); + + if (!has_aborted()) { + // We cannot check whether the global stack is empty, since other + // tasks might be pushing objects to it concurrently. + tmp_guarantee_CM( _cm->out_of_regions() && _cm->region_stack_empty(), + "at this point we should be out of regions" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); + + // Try to reduce the number of available SATB buffers so that + // remark has less work to do. + drain_satb_buffers(); + } + + // Since we've done everything else, we can now totally drain the + // local queue and global stack. + drain_local_queue(false); + drain_global_stack(false); + + // Attempt at work stealing from other task's queues. + if (!has_aborted()) { + // We have not aborted. This means that we have finished all that + // we could. Let's try to do some stealing... + + // We cannot check whether the global stack is empty, since other + // tasks might be pushing objects to it concurrently. + guarantee( _cm->out_of_regions() && + _cm->region_stack_empty() && + _task_queue->size() == 0, "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] starting to steal", _task_id); + + while (!has_aborted()) { + oop obj; + statsOnly( ++_steal_attempts ); + + if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", + _task_id, (void*) obj); + + statsOnly( ++_steals ); + + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), + "any stolen object should be marked" ); + scan_object(obj); + + // And since we're towards the end, let's totally drain the + // local queue and global stack. + drain_local_queue(false); + drain_global_stack(false); + } else { + break; + } + } + } + + // We still haven't aborted. Now, let's try to get into the + // termination protocol. + if (!has_aborted()) { + // We cannot check whether the global stack is empty, since other + // tasks might be concurrently pushing objects on it. + guarantee( _cm->out_of_regions() && + _cm->region_stack_empty() && + _task_queue->size() == 0, "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); + + _termination_start_time_ms = os::elapsedVTime() * 1000.0; + // The CMTask class also extends the TerminatorTerminator class, + // hence its should_exit_termination() method will also decide + // whether to exit the termination protocol or not. + bool finished = _cm->terminator()->offer_termination(this); + double termination_end_time_ms = os::elapsedVTime() * 1000.0; + _termination_time_ms += + termination_end_time_ms - _termination_start_time_ms; + + if (finished) { + // We're all done. + + if (_task_id == 0) { + // let's allow task 0 to do this + if (concurrent()) { + guarantee( _cm->concurrent_marking_in_progress(), "invariant" ); + // we need to set this to false before the next + // safepoint. This way we ensure that the marking phase + // doesn't observe any more heap expansions. + _cm->clear_concurrent_marking_in_progress(); + } + } + + // We can now guarantee that the global stack is empty, since + // all other tasks have finished. + guarantee( _cm->out_of_regions() && + _cm->region_stack_empty() && + _cm->mark_stack_empty() && + _task_queue->size() == 0 && + !_cm->has_overflown() && + !_cm->mark_stack_overflow() && + !_cm->region_stack_overflow(), + "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); + } else { + // Apparently there's more work to do. Let's abort this task. It + // will restart it and we can hopefully find more things to do. + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); + + set_has_aborted(); + statsOnly( ++_aborted_termination ); + } + } + + // Mainly for debugging purposes to make sure that a pointer to the + // closure which was statically allocated in this frame doesn't + // escape it by accident. + set_oop_closure(NULL); + double end_time_ms = os::elapsedVTime() * 1000.0; + double elapsed_time_ms = end_time_ms - _start_time_ms; + // Update the step history. + _step_times_ms.add(elapsed_time_ms); + + if (has_aborted()) { + // The task was aborted for some reason. + + statsOnly( ++_aborted ); + + if (_has_aborted_timed_out) { + double diff_ms = elapsed_time_ms - _time_target_ms; + // Keep statistics of how well we did with respect to hitting + // our target only if we actually timed out (if we aborted for + // other reasons, then the results might get skewed). + _marking_step_diffs_ms.add(diff_ms); + } + + if (_cm->has_overflown()) { + // This is the interesting one. We aborted because a global + // overflow was raised. This means we have to restart the + // marking phase and start iterating over regions. However, in + // order to do this we have to make sure that all tasks stop + // what they are doing and re-initialise in a safe manner. We + // will achieve this with the use of two barrier sync points. + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] detected overflow", _task_id); + + _cm->enter_first_sync_barrier(_task_id); + // When we exit this sync barrier we know that all tasks have + // stopped doing marking work. So, it's now safe to + // re-initialise our data structures. At the end of this method, + // task 0 will clear the global data structures. + + statsOnly( ++_aborted_overflow ); + + // We clear the local state of this task... + clear_region_fields(); + + // ...and enter the second barrier. + _cm->enter_second_sync_barrier(_task_id); + // At this point everything has bee re-initialised and we're + // ready to restart. + } + + if (_cm->verbose_low()) { + gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " + "elapsed = %1.2lfms <<<<<<<<<<", + _task_id, _time_target_ms, elapsed_time_ms); + if (_cm->has_aborted()) + gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", + _task_id); + } + } else { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " + "elapsed = %1.2lfms <<<<<<<<<<", + _task_id, _time_target_ms, elapsed_time_ms); + } + + _claimed = false; +} + +CMTask::CMTask(int task_id, + ConcurrentMark* cm, + CMTaskQueue* task_queue, + CMTaskQueueSet* task_queues) + : _g1h(G1CollectedHeap::heap()), + _co_tracker(G1CMGroup), + _task_id(task_id), _cm(cm), + _claimed(false), + _nextMarkBitMap(NULL), _hash_seed(17), + _task_queue(task_queue), + _task_queues(task_queues), + _oop_closure(NULL) { + guarantee( task_queue != NULL, "invariant" ); + guarantee( task_queues != NULL, "invariant" ); + + statsOnly( _clock_due_to_scanning = 0; + _clock_due_to_marking = 0 ); + + _marking_step_diffs_ms.add(0.5); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,1049 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class G1CollectedHeap; +class CMTask; +typedef GenericTaskQueue CMTaskQueue; +typedef GenericTaskQueueSet CMTaskQueueSet; + +// A generic CM bit map. This is essentially a wrapper around the BitMap +// class, with one bit per (1<<_shifter) HeapWords. + +class CMBitMapRO { + protected: + HeapWord* _bmStartWord; // base address of range covered by map + size_t _bmWordSize; // map size (in #HeapWords covered) + const int _shifter; // map to char or bit + VirtualSpace _virtual_space; // underlying the bit map + BitMap _bm; // the bit map itself + + public: + // constructor + CMBitMapRO(ReservedSpace rs, int shifter); + + enum { do_yield = true }; + + // inquiries + HeapWord* startWord() const { return _bmStartWord; } + size_t sizeInWords() const { return _bmWordSize; } + // the following is one past the last word in space + HeapWord* endWord() const { return _bmStartWord + _bmWordSize; } + + // read marks + + bool isMarked(HeapWord* addr) const { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.at(heapWordToOffset(addr)); + } + + // iteration + bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); } + bool iterate(BitMapClosure* cl, MemRegion mr); + + // Return the address corresponding to the next marked bit at or after + // "addr", and before "limit", if "limit" is non-NULL. If there is no + // such bit, returns "limit" if that is non-NULL, or else "endWord()". + HeapWord* getNextMarkedWordAddress(HeapWord* addr, + HeapWord* limit = NULL) const; + // Return the address corresponding to the next unmarked bit at or after + // "addr", and before "limit", if "limit" is non-NULL. If there is no + // such bit, returns "limit" if that is non-NULL, or else "endWord()". + HeapWord* getNextUnmarkedWordAddress(HeapWord* addr, + HeapWord* limit = NULL) const; + + // conversion utilities + // XXX Fix these so that offsets are size_t's... + HeapWord* offsetToHeapWord(size_t offset) const { + return _bmStartWord + (offset << _shifter); + } + size_t heapWordToOffset(HeapWord* addr) const { + return pointer_delta(addr, _bmStartWord) >> _shifter; + } + int heapWordDiffToOffsetDiff(size_t diff) const; + HeapWord* nextWord(HeapWord* addr) { + return offsetToHeapWord(heapWordToOffset(addr) + 1); + } + + void mostly_disjoint_range_union(BitMap* from_bitmap, + size_t from_start_index, + HeapWord* to_start_word, + size_t word_num); + + // debugging + NOT_PRODUCT(bool covers(ReservedSpace rs) const;) +}; + +class CMBitMap : public CMBitMapRO { + + public: + // constructor + CMBitMap(ReservedSpace rs, int shifter) : + CMBitMapRO(rs, shifter) {} + + // write marks + void mark(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + _bm.at_put(heapWordToOffset(addr), true); + } + void clear(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + _bm.at_put(heapWordToOffset(addr), false); + } + bool parMark(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.par_at_put(heapWordToOffset(addr), true); + } + bool parClear(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.par_at_put(heapWordToOffset(addr), false); + } + void markRange(MemRegion mr); + void clearAll(); + void clearRange(MemRegion mr); + + // Starting at the bit corresponding to "addr" (inclusive), find the next + // "1" bit, if any. This bit starts some run of consecutive "1"'s; find + // the end of this run (stopping at "end_addr"). Return the MemRegion + // covering from the start of the region corresponding to the first bit + // of the run to the end of the region corresponding to the last bit of + // the run. If there is no "1" bit at or after "addr", return an empty + // MemRegion. + MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr); +}; + +// Represents a marking stack used by the CM collector. +// Ideally this should be GrowableArray<> just like MSC's marking stack(s). +class CMMarkStack { + ConcurrentMark* _cm; + oop* _base; // bottom of stack + jint _index; // one more than last occupied index + jint _capacity; // max #elements + jint _oops_do_bound; // Number of elements to include in next iteration. + NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run + + bool _overflow; + DEBUG_ONLY(bool _drain_in_progress;) + DEBUG_ONLY(bool _drain_in_progress_yields;) + + public: + CMMarkStack(ConcurrentMark* cm); + ~CMMarkStack(); + + void allocate(size_t size); + + oop pop() { + if (!isEmpty()) { + return _base[--_index] ; + } + return NULL; + } + + // If overflow happens, don't do the push, and record the overflow. + // *Requires* that "ptr" is already marked. + void push(oop ptr) { + if (isFull()) { + // Record overflow. + _overflow = true; + return; + } else { + _base[_index++] = ptr; + NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index)); + } + } + // Non-block impl. Note: concurrency is allowed only with other + // "par_push" operations, not with "pop" or "drain". We would need + // parallel versions of them if such concurrency was desired. + void par_push(oop ptr); + + // Pushes the first "n" elements of "ptr_arr" on the stack. + // Non-block impl. Note: concurrency is allowed only with other + // "par_adjoin_arr" or "push" operations, not with "pop" or "drain". + void par_adjoin_arr(oop* ptr_arr, int n); + + // Pushes the first "n" elements of "ptr_arr" on the stack. + // Locking impl: concurrency is allowed only with + // "par_push_arr" and/or "par_pop_arr" operations, which use the same + // locking strategy. + void par_push_arr(oop* ptr_arr, int n); + + // If returns false, the array was empty. Otherwise, removes up to "max" + // elements from the stack, and transfers them to "ptr_arr" in an + // unspecified order. The actual number transferred is given in "n" ("n + // == 0" is deliberately redundant with the return value.) Locking impl: + // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr" + // operations, which use the same locking strategy. + bool par_pop_arr(oop* ptr_arr, int max, int* n); + + // Drain the mark stack, applying the given closure to all fields of + // objects on the stack. (That is, continue until the stack is empty, + // even if closure applications add entries to the stack.) The "bm" + // argument, if non-null, may be used to verify that only marked objects + // are on the mark stack. If "yield_after" is "true", then the + // concurrent marker performing the drain offers to yield after + // processing each object. If a yield occurs, stops the drain operation + // and returns false. Otherwise, returns true. + template + bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false); + + bool isEmpty() { return _index == 0; } + bool isFull() { return _index == _capacity; } + int maxElems() { return _capacity; } + + bool overflow() { return _overflow; } + void clear_overflow() { _overflow = false; } + + int size() { return _index; } + + void setEmpty() { _index = 0; clear_overflow(); } + + // Record the current size; a subsequent "oops_do" will iterate only over + // indices valid at the time of this call. + void set_oops_do_bound(jint bound = -1) { + if (bound == -1) { + _oops_do_bound = _index; + } else { + _oops_do_bound = bound; + } + } + jint oops_do_bound() { return _oops_do_bound; } + // iterate over the oops in the mark stack, up to the bound recorded via + // the call above. + void oops_do(OopClosure* f); +}; + +class CMRegionStack { + MemRegion* _base; + jint _capacity; + jint _index; + jint _oops_do_bound; + bool _overflow; +public: + CMRegionStack(); + ~CMRegionStack(); + void allocate(size_t size); + + // This is lock-free; assumes that it will only be called in parallel + // with other "push" operations (no pops). + void push(MemRegion mr); + + // Lock-free; assumes that it will only be called in parallel + // with other "pop" operations (no pushes). + MemRegion pop(); + + bool isEmpty() { return _index == 0; } + bool isFull() { return _index == _capacity; } + + bool overflow() { return _overflow; } + void clear_overflow() { _overflow = false; } + + int size() { return _index; } + + // It iterates over the entries in the region stack and it + // invalidates (i.e. assigns MemRegion()) the ones that point to + // regions in the collection set. + bool invalidate_entries_into_cset(); + + // This gives an upper bound up to which the iteration in + // invalidate_entries_into_cset() will reach. This prevents + // newly-added entries to be unnecessarily scanned. + void set_oops_do_bound() { + _oops_do_bound = _index; + } + + void setEmpty() { _index = 0; clear_overflow(); } +}; + +// this will enable a variety of different statistics per GC task +#define _MARKING_STATS_ 0 +// this will enable the higher verbose levels +#define _MARKING_VERBOSE_ 0 + +#if _MARKING_STATS_ +#define statsOnly(statement) \ +do { \ + statement ; \ +} while (0) +#else // _MARKING_STATS_ +#define statsOnly(statement) \ +do { \ +} while (0) +#endif // _MARKING_STATS_ + +// Some extra guarantees that I like to also enable in optimised mode +// when debugging. If you want to enable them, comment out the assert +// macro and uncomment out the guaratee macro +// #define tmp_guarantee_CM(expr, str) guarantee(expr, str) +#define tmp_guarantee_CM(expr, str) assert(expr, str) + +typedef enum { + no_verbose = 0, // verbose turned off + stats_verbose, // only prints stats at the end of marking + low_verbose, // low verbose, mostly per region and per major event + medium_verbose, // a bit more detailed than low + high_verbose // per object verbose +} CMVerboseLevel; + + +class ConcurrentMarkThread; + +class ConcurrentMark { + friend class ConcurrentMarkThread; + friend class CMTask; + friend class CMBitMapClosure; + friend class CSMarkOopClosure; + friend class CMGlobalObjectClosure; + friend class CMRemarkTask; + friend class CMConcurrentMarkingTask; + friend class G1ParNoteEndTask; + friend class CalcLiveObjectsClosure; + +protected: + ConcurrentMarkThread* _cmThread; // the thread doing the work + G1CollectedHeap* _g1h; // the heap. + size_t _parallel_marking_threads; // the number of marking + // threads we'll use + double _sleep_factor; // how much we have to sleep, with + // respect to the work we just did, to + // meet the marking overhead goal + double _marking_task_overhead; // marking target overhead for + // a single task + + // same as the two above, but for the cleanup task + double _cleanup_sleep_factor; + double _cleanup_task_overhead; + + // Stuff related to age cohort processing. + struct ParCleanupThreadState { + char _pre[64]; + UncleanRegionList list; + char _post[64]; + }; + ParCleanupThreadState** _par_cleanup_thread_state; + + // CMS marking support structures + CMBitMap _markBitMap1; + CMBitMap _markBitMap2; + CMBitMapRO* _prevMarkBitMap; // completed mark bitmap + CMBitMap* _nextMarkBitMap; // under-construction mark bitmap + bool _at_least_one_mark_complete; + + BitMap _region_bm; + BitMap _card_bm; + + // Heap bounds + HeapWord* _heap_start; + HeapWord* _heap_end; + + // For gray objects + CMMarkStack _markStack; // Grey objects behind global finger. + CMRegionStack _regionStack; // Grey regions behind global finger. + HeapWord* volatile _finger; // the global finger, region aligned, + // always points to the end of the + // last claimed region + + // marking tasks + size_t _max_task_num; // maximum task number + size_t _active_tasks; // task num currently active + CMTask** _tasks; // task queue array (max_task_num len) + CMTaskQueueSet* _task_queues; // task queue set + ParallelTaskTerminator _terminator; // for termination + + // Two sync barriers that are used to synchronise tasks when an + // overflow occurs. The algorithm is the following. All tasks enter + // the first one to ensure that they have all stopped manipulating + // the global data structures. After they exit it, they re-initialise + // their data structures and task 0 re-initialises the global data + // structures. Then, they enter the second sync barrier. This + // ensure, that no task starts doing work before all data + // structures (local and global) have been re-initialised. When they + // exit it, they are free to start working again. + WorkGangBarrierSync _first_overflow_barrier_sync; + WorkGangBarrierSync _second_overflow_barrier_sync; + + + // this is set by any task, when an overflow on the global data + // structures is detected. + volatile bool _has_overflown; + // true: marking is concurrent, false: we're in remark + volatile bool _concurrent; + // set at the end of a Full GC so that marking aborts + volatile bool _has_aborted; + // used when remark aborts due to an overflow to indicate that + // another concurrent marking phase should start + volatile bool _restart_for_overflow; + + // This is true from the very start of concurrent marking until the + // point when all the tasks complete their work. It is really used + // to determine the points between the end of concurrent marking and + // time of remark. + volatile bool _concurrent_marking_in_progress; + + // verbose level + CMVerboseLevel _verbose_level; + + COTracker _cleanup_co_tracker; + + // These two fields are used to implement the optimisation that + // avoids pushing objects on the global/region stack if there are + // no collection set regions above the lowest finger. + + // This is the lowest finger (among the global and local fingers), + // which is calculated before a new collection set is chosen. + HeapWord* _min_finger; + // If this flag is true, objects/regions that are marked below the + // finger should be pushed on the stack(s). If this is flag is + // false, it is safe not to push them on the stack(s). + bool _should_gray_objects; + + // All of these times are in ms. + NumberSeq _init_times; + NumberSeq _remark_times; + NumberSeq _remark_mark_times; + NumberSeq _remark_weak_ref_times; + NumberSeq _cleanup_times; + double _total_counting_time; + double _total_rs_scrub_time; + + double* _accum_task_vtime; // accumulated task vtime + + WorkGang* _parallel_workers; + + void weakRefsWork(bool clear_all_soft_refs); + + void swapMarkBitMaps(); + + // It resets the global marking data structures, as well as the + // task local ones; should be called during initial mark. + void reset(); + // It resets all the marking data structures. + void clear_marking_state(); + + // It should be called to indicate which phase we're in (concurrent + // mark or remark) and how many threads are currently active. + void set_phase(size_t active_tasks, bool concurrent); + // We do this after we're done with marking so that the marking data + // structures are initialised to a sensible and predictable state. + void set_non_marking_state(); + + // prints all gathered CM-related statistics + void print_stats(); + + // accessor methods + size_t parallel_marking_threads() { return _parallel_marking_threads; } + double sleep_factor() { return _sleep_factor; } + double marking_task_overhead() { return _marking_task_overhead;} + double cleanup_sleep_factor() { return _cleanup_sleep_factor; } + double cleanup_task_overhead() { return _cleanup_task_overhead;} + + HeapWord* finger() { return _finger; } + bool concurrent() { return _concurrent; } + size_t active_tasks() { return _active_tasks; } + ParallelTaskTerminator* terminator() { return &_terminator; } + + // It claims the next available region to be scanned by a marking + // task. It might return NULL if the next region is empty or we have + // run out of regions. In the latter case, out_of_regions() + // determines whether we've really run out of regions or the task + // should call claim_region() again. This might seem a bit + // awkward. Originally, the code was written so that claim_region() + // either successfully returned with a non-empty region or there + // were no more regions to be claimed. The problem with this was + // that, in certain circumstances, it iterated over large chunks of + // the heap finding only empty regions and, while it was working, it + // was preventing the calling task to call its regular clock + // method. So, this way, each task will spend very little time in + // claim_region() and is allowed to call the regular clock method + // frequently. + HeapRegion* claim_region(int task); + + // It determines whether we've run out of regions to scan. + bool out_of_regions() { return _finger == _heap_end; } + + // Returns the task with the given id + CMTask* task(int id) { + guarantee( 0 <= id && id < (int) _active_tasks, "task id not within " + "active bounds" ); + return _tasks[id]; + } + + // Returns the task queue with the given id + CMTaskQueue* task_queue(int id) { + guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within " + "active bounds" ); + return (CMTaskQueue*) _task_queues->queue(id); + } + + // Returns the task queue set + CMTaskQueueSet* task_queues() { return _task_queues; } + + // Access / manipulation of the overflow flag which is set to + // indicate that the global stack or region stack has overflown + bool has_overflown() { return _has_overflown; } + void set_has_overflown() { _has_overflown = true; } + void clear_has_overflown() { _has_overflown = false; } + + bool has_aborted() { return _has_aborted; } + bool restart_for_overflow() { return _restart_for_overflow; } + + // Methods to enter the two overflow sync barriers + void enter_first_sync_barrier(int task_num); + void enter_second_sync_barrier(int task_num); + +public: + // Manipulation of the global mark stack. + // Notice that the first mark_stack_push is CAS-based, whereas the + // two below are Mutex-based. This is OK since the first one is only + // called during evacuation pauses and doesn't compete with the + // other two (which are called by the marking tasks during + // concurrent marking or remark). + bool mark_stack_push(oop p) { + _markStack.par_push(p); + if (_markStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + bool mark_stack_push(oop* arr, int n) { + _markStack.par_push_arr(arr, n); + if (_markStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + void mark_stack_pop(oop* arr, int max, int* n) { + _markStack.par_pop_arr(arr, max, n); + } + size_t mark_stack_size() { return _markStack.size(); } + size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; } + bool mark_stack_overflow() { return _markStack.overflow(); } + bool mark_stack_empty() { return _markStack.isEmpty(); } + + // Manipulation of the region stack + bool region_stack_push(MemRegion mr) { + _regionStack.push(mr); + if (_regionStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + MemRegion region_stack_pop() { return _regionStack.pop(); } + int region_stack_size() { return _regionStack.size(); } + bool region_stack_overflow() { return _regionStack.overflow(); } + bool region_stack_empty() { return _regionStack.isEmpty(); } + + bool concurrent_marking_in_progress() { + return _concurrent_marking_in_progress; + } + void set_concurrent_marking_in_progress() { + _concurrent_marking_in_progress = true; + } + void clear_concurrent_marking_in_progress() { + _concurrent_marking_in_progress = false; + } + + void update_accum_task_vtime(int i, double vtime) { + _accum_task_vtime[i] += vtime; + } + + double all_task_accum_vtime() { + double ret = 0.0; + for (int i = 0; i < (int)_max_task_num; ++i) + ret += _accum_task_vtime[i]; + return ret; + } + + // Attempts to steal an object from the task queues of other tasks + bool try_stealing(int task_num, int* hash_seed, oop& obj) { + return _task_queues->steal(task_num, hash_seed, obj); + } + + // It grays an object by first marking it. Then, if it's behind the + // global finger, it also pushes it on the global stack. + void deal_with_reference(oop obj); + + ConcurrentMark(ReservedSpace rs, int max_regions); + ~ConcurrentMark(); + ConcurrentMarkThread* cmThread() { return _cmThread; } + + CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; } + CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } + + // The following three are interaction between CM and + // G1CollectedHeap + + // This notifies CM that a root during initial-mark needs to be + // grayed and it's MT-safe. Currently, we just mark it. But, in the + // future, we can experiment with pushing it on the stack and we can + // do this without changing G1CollectedHeap. + void grayRoot(oop p); + // It's used during evacuation pauses to gray a region, if + // necessary, and it's MT-safe. It assumes that the caller has + // marked any objects on that region. If _should_gray_objects is + // true and we're still doing concurrent marking, the region is + // pushed on the region stack, if it is located below the global + // finger, otherwise we do nothing. + void grayRegionIfNecessary(MemRegion mr); + // It's used during evacuation pauses to mark and, if necessary, + // gray a single object and it's MT-safe. It assumes the caller did + // not mark the object. If _should_gray_objects is true and we're + // still doing concurrent marking, the objects is pushed on the + // global stack, if it is located below the global finger, otherwise + // we do nothing. + void markAndGrayObjectIfNecessary(oop p); + + // This iterates over the bitmap of the previous marking and prints + // out all objects that are marked on the bitmap and indicates + // whether what they point to is also marked or not. + void print_prev_bitmap_reachable(); + + // Clear the next marking bitmap (will be called concurrently). + void clearNextBitmap(); + + // main CMS steps and related support + void checkpointRootsInitial(); + + // These two do the work that needs to be done before and after the + // initial root checkpoint. Since this checkpoint can be done at two + // different points (i.e. an explicit pause or piggy-backed on a + // young collection), then it's nice to be able to easily share the + // pre/post code. It might be the case that we can put everything in + // the post method. TP + void checkpointRootsInitialPre(); + void checkpointRootsInitialPost(); + + // Do concurrent phase of marking, to a tentative transitive closure. + void markFromRoots(); + + // Process all unprocessed SATB buffers. It is called at the + // beginning of an evacuation pause. + void drainAllSATBBuffers(); + + void checkpointRootsFinal(bool clear_all_soft_refs); + void checkpointRootsFinalWork(); + void calcDesiredRegions(); + void cleanup(); + void completeCleanup(); + + // Mark in the previous bitmap. NB: this is usually read-only, so use + // this carefully! + void markPrev(oop p); + void clear(oop p); + // Clears marks for all objects in the given range, for both prev and + // next bitmaps. NB: the previous bitmap is usually read-only, so use + // this carefully! + void clearRangeBothMaps(MemRegion mr); + + // Record the current top of the mark and region stacks; a + // subsequent oops_do() on the mark stack and + // invalidate_entries_into_cset() on the region stack will iterate + // only over indices valid at the time of this call. + void set_oops_do_bound() { + _markStack.set_oops_do_bound(); + _regionStack.set_oops_do_bound(); + } + // Iterate over the oops in the mark stack and all local queues. It + // also calls invalidate_entries_into_cset() on the region stack. + void oops_do(OopClosure* f); + // It is called at the end of an evacuation pause during marking so + // that CM is notified of where the new end of the heap is. It + // doesn't do anything if concurrent_marking_in_progress() is false, + // unless the force parameter is true. + void update_g1_committed(bool force = false); + + void complete_marking_in_collection_set(); + + // It indicates that a new collection set is being chosen. + void newCSet(); + // It registers a collection set heap region with CM. This is used + // to determine whether any heap regions are located above the finger. + void registerCSetRegion(HeapRegion* hr); + + // Returns "true" if at least one mark has been completed. + bool at_least_one_mark_complete() { return _at_least_one_mark_complete; } + + bool isMarked(oop p) const { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _nextMarkBitMap->startWord() || + addr < _nextMarkBitMap->endWord(), "in a region"); + + return _nextMarkBitMap->isMarked(addr); + } + + inline bool not_yet_marked(oop p) const; + + // XXX Debug code + bool containing_card_is_marked(void* p); + bool containing_cards_are_marked(void* start, void* last); + + bool isPrevMarked(oop p) const { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _prevMarkBitMap->startWord() || + addr < _prevMarkBitMap->endWord(), "in a region"); + + return _prevMarkBitMap->isMarked(addr); + } + + inline bool do_yield_check(int worker_i = 0); + inline bool should_yield(); + + // Called to abort the marking cycle after a Full GC takes palce. + void abort(); + + void disable_co_trackers(); + + // This prints the global/local fingers. It is used for debugging. + NOT_PRODUCT(void print_finger();) + + void print_summary_info(); + + // The following indicate whether a given verbose level has been + // set. Notice that anything above stats is conditional to + // _MARKING_VERBOSE_ having been set to 1 + bool verbose_stats() + { return _verbose_level >= stats_verbose; } + bool verbose_low() + { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; } + bool verbose_medium() + { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; } + bool verbose_high() + { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; } +}; + +// A class representing a marking task. +class CMTask : public TerminatorTerminator { +private: + enum PrivateConstants { + // the regular clock call is called once the scanned words reaches + // this limit + words_scanned_period = 12*1024, + // the regular clock call is called once the number of visited + // references reaches this limit + refs_reached_period = 384, + // initial value for the hash seed, used in the work stealing code + init_hash_seed = 17, + // how many entries will be transferred between global stack and + // local queues + global_stack_transfer_size = 16 + }; + + int _task_id; + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + CMBitMap* _nextMarkBitMap; + // the task queue of this task + CMTaskQueue* _task_queue; + // the task queue set---needed for stealing + CMTaskQueueSet* _task_queues; + // indicates whether the task has been claimed---this is only for + // debugging purposes + bool _claimed; + + // number of calls to this task + int _calls; + + // concurrent overhead over a single CPU for this task + COTracker _co_tracker; + + // when the virtual timer reaches this time, the marking step should + // exit + double _time_target_ms; + // the start time of the current marking step + double _start_time_ms; + + // the oop closure used for iterations over oops + OopClosure* _oop_closure; + + // the region this task is scanning, NULL if we're not scanning any + HeapRegion* _curr_region; + // the local finger of this task, NULL if we're not scanning a region + HeapWord* _finger; + // limit of the region this task is scanning, NULL if we're not scanning one + HeapWord* _region_limit; + + // This is used only when we scan regions popped from the region + // stack. It records what the last object on such a region we + // scanned was. It is used to ensure that, if we abort region + // iteration, we do not rescan the first part of the region. This + // should be NULL when we're not scanning a region from the region + // stack. + HeapWord* _region_finger; + + // the number of words this task has scanned + size_t _words_scanned; + // When _words_scanned reaches this limit, the regular clock is + // called. Notice that this might be decreased under certain + // circumstances (i.e. when we believe that we did an expensive + // operation). + size_t _words_scanned_limit; + // the initial value of _words_scanned_limit (i.e. what it was + // before it was decreased). + size_t _real_words_scanned_limit; + + // the number of references this task has visited + size_t _refs_reached; + // When _refs_reached reaches this limit, the regular clock is + // called. Notice this this might be decreased under certain + // circumstances (i.e. when we believe that we did an expensive + // operation). + size_t _refs_reached_limit; + // the initial value of _refs_reached_limit (i.e. what it was before + // it was decreased). + size_t _real_refs_reached_limit; + + // used by the work stealing stuff + int _hash_seed; + // if this is true, then the task has aborted for some reason + bool _has_aborted; + // set when the task aborts because it has met its time quota + bool _has_aborted_timed_out; + // true when we're draining SATB buffers; this avoids the task + // aborting due to SATB buffers being available (as we're already + // dealing with them) + bool _draining_satb_buffers; + + // number sequence of past step times + NumberSeq _step_times_ms; + // elapsed time of this task + double _elapsed_time_ms; + // termination time of this task + double _termination_time_ms; + // when this task got into the termination protocol + double _termination_start_time_ms; + + // true when the task is during a concurrent phase, false when it is + // in the remark phase (so, in the latter case, we do not have to + // check all the things that we have to check during the concurrent + // phase, i.e. SATB buffer availability...) + bool _concurrent; + + TruncatedSeq _marking_step_diffs_ms; + + // LOTS of statistics related with this task +#if _MARKING_STATS_ + NumberSeq _all_clock_intervals_ms; + double _interval_start_time_ms; + + int _aborted; + int _aborted_overflow; + int _aborted_cm_aborted; + int _aborted_yield; + int _aborted_timed_out; + int _aborted_satb; + int _aborted_termination; + + int _steal_attempts; + int _steals; + + int _clock_due_to_marking; + int _clock_due_to_scanning; + + int _local_pushes; + int _local_pops; + int _local_max_size; + int _objs_scanned; + + int _global_pushes; + int _global_pops; + int _global_max_size; + + int _global_transfers_to; + int _global_transfers_from; + + int _region_stack_pops; + + int _regions_claimed; + int _objs_found_on_bitmap; + + int _satb_buffers_processed; +#endif // _MARKING_STATS_ + + // it updates the local fields after this task has claimed + // a new region to scan + void setup_for_region(HeapRegion* hr); + // it brings up-to-date the limit of the region + void update_region_limit(); + // it resets the local fields after a task has finished scanning a + // region + void giveup_current_region(); + + // called when either the words scanned or the refs visited limit + // has been reached + void reached_limit(); + // recalculates the words scanned and refs visited limits + void recalculate_limits(); + // decreases the words scanned and refs visited limits when we reach + // an expensive operation + void decrease_limits(); + // it checks whether the words scanned or refs visited reached their + // respective limit and calls reached_limit() if they have + void check_limits() { + if (_words_scanned >= _words_scanned_limit || + _refs_reached >= _refs_reached_limit) + reached_limit(); + } + // this is supposed to be called regularly during a marking step as + // it checks a bunch of conditions that might cause the marking step + // to abort + void regular_clock_call(); + bool concurrent() { return _concurrent; } + +public: + // It resets the task; it should be called right at the beginning of + // a marking phase. + void reset(CMBitMap* _nextMarkBitMap); + // it clears all the fields that correspond to a claimed region. + void clear_region_fields(); + + void set_concurrent(bool concurrent) { _concurrent = concurrent; } + + void enable_co_tracker() { + guarantee( !_co_tracker.enabled(), "invariant" ); + _co_tracker.enable(); + } + void disable_co_tracker() { + guarantee( _co_tracker.enabled(), "invariant" ); + _co_tracker.disable(); + } + bool co_tracker_enabled() { + return _co_tracker.enabled(); + } + void reset_co_tracker(double starting_conc_overhead = 0.0) { + _co_tracker.reset(starting_conc_overhead); + } + void start_co_tracker() { + _co_tracker.start(); + } + void update_co_tracker(bool force_end = false) { + _co_tracker.update(force_end); + } + + // The main method of this class which performs a marking step + // trying not to exceed the given duration. However, it might exit + // prematurely, according to some conditions (i.e. SATB buffers are + // available for processing). + void do_marking_step(double target_ms); + + // These two calls start and stop the timer + void record_start_time() { + _elapsed_time_ms = os::elapsedTime() * 1000.0; + } + void record_end_time() { + _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms; + } + + // returns the task ID + int task_id() { return _task_id; } + + // From TerminatorTerminator. It determines whether this task should + // exit the termination protocol after it's entered it. + virtual bool should_exit_termination(); + + HeapWord* finger() { return _finger; } + + bool has_aborted() { return _has_aborted; } + void set_has_aborted() { _has_aborted = true; } + void clear_has_aborted() { _has_aborted = false; } + bool claimed() { return _claimed; } + + void set_oop_closure(OopClosure* oop_closure) { + _oop_closure = oop_closure; + } + + // It grays the object by marking it and, if necessary, pushing it + // on the local queue + void deal_with_reference(oop obj); + + // It scans an object and visits its children. + void scan_object(oop obj) { + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), + "invariant" ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, + _task_id, (void*) obj); + + size_t obj_size = obj->size(); + _words_scanned += obj_size; + + obj->oop_iterate(_oop_closure); + statsOnly( ++_objs_scanned ); + check_limits(); + } + + // It pushes an object on the local queue. + void push(oop obj); + + // These two move entries to/from the global stack. + void move_entries_to_global_stack(); + void get_entries_from_global_stack(); + + // It pops and scans objects from the local queue. If partially is + // true, then it stops when the queue size is of a given limit. If + // partially is false, then it stops when the queue is empty. + void drain_local_queue(bool partially); + // It moves entries from the global stack to the local queue and + // drains the local queue. If partially is true, then it stops when + // both the global stack and the local queue reach a given size. If + // partially if false, it tries to empty them totally. + void drain_global_stack(bool partially); + // It keeps picking SATB buffers and processing them until no SATB + // buffers are available. + void drain_satb_buffers(); + // It keeps popping regions from the region stack and processing + // them until the region stack is empty. + void drain_region_stack(BitMapClosure* closure); + + // moves the local finger to a new location + inline void move_finger_to(HeapWord* new_finger) { + tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit, + "invariant" ); + _finger = new_finger; + } + + // moves the region finger to a new location + inline void move_region_finger_to(HeapWord* new_finger) { + tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" ); + _region_finger = new_finger; + } + + CMTask(int task_num, ConcurrentMark *cm, + CMTaskQueue* task_queue, CMTaskQueueSet* task_queues); + + // it prints statistics associated with this task + void print_stats(); + +#if _MARKING_STATS_ + void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; } +#endif // _MARKING_STATS_ +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,336 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentMarkThread.cpp.incl" + +// ======= Concurrent Mark Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +SurrogateLockerThread* + ConcurrentMarkThread::_slt = NULL; + +ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) : + ConcurrentGCThread(), + _cm(cm), + _started(false), + _in_progress(false), + _vtime_accum(0.0), + _vtime_mark_accum(0.0), + _vtime_count_accum(0.0) +{ + create_and_start(); +} + +class CMCheckpointRootsInitialClosure: public VoidClosure { + + ConcurrentMark* _cm; +public: + + CMCheckpointRootsInitialClosure(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->checkpointRootsInitial(); + } +}; + +class CMCheckpointRootsFinalClosure: public VoidClosure { + + ConcurrentMark* _cm; +public: + + CMCheckpointRootsFinalClosure(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->checkpointRootsFinal(false); // !clear_all_soft_refs + } +}; + +class CMCleanUp: public VoidClosure { + ConcurrentMark* _cm; +public: + + CMCleanUp(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->cleanup(); + } +}; + + + +void ConcurrentMarkThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1CollectorPolicy* g1_policy = g1->g1_policy(); + G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker(); + Thread *current_thread = Thread::current(); + + while (!_should_terminate) { + // wait until started is set. + sleepBeforeNextCycle(); + { + ResourceMark rm; + HandleMark hm; + double cycle_start = os::elapsedVTime(); + double mark_start_sec = os::elapsedTime(); + char verbose_str[128]; + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + tty->print_cr("[GC concurrent-mark-start]"); + } + + if (!g1_policy->in_young_gc_mode()) { + // this ensures the flag is not set if we bail out of the marking + // cycle; normally the flag is cleared immediately after cleanup + g1->set_marking_complete(); + + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double init_prediction_ms = g1_policy->predict_init_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + // We don't have to skip here if we've been asked to restart, because + // in the worst case we just enqueue a new VM operation to start a + // marking. Note that the init operation resets has_aborted() + CMCheckpointRootsInitialClosure init_cl(_cm); + strcpy(verbose_str, "GC initial-mark"); + VM_CGC_Operation op(&init_cl, verbose_str); + VMThread::execute(&op); + } + + int iter = 0; + do { + iter++; + if (!cm()->has_aborted()) { + _cm->markFromRoots(); + } else { + if (TraceConcurrentMark) + gclog_or_tty->print_cr("CM-skip-mark-from-roots"); + } + + double mark_end_time = os::elapsedVTime(); + double mark_end_sec = os::elapsedTime(); + _vtime_mark_accum += (mark_end_time - cycle_start); + if (!cm()->has_aborted()) { + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double remark_prediction_ms = g1_policy->predict_remark_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]", + mark_end_sec - mark_start_sec); + } + + CMCheckpointRootsFinalClosure final_cl(_cm); + sprintf(verbose_str, "GC remark"); + VM_CGC_Operation op(&final_cl, verbose_str); + VMThread::execute(&op); + } else { + if (TraceConcurrentMark) + gclog_or_tty->print_cr("CM-skip-remark"); + } + if (cm()->restart_for_overflow() && + G1TraceMarkStackOverflow) { + gclog_or_tty->print_cr("Restarting conc marking because of MS overflow " + "in remark (restart #%d).", iter); + } + + if (cm()->restart_for_overflow()) { + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]"); + } + } + } while (cm()->restart_for_overflow()); + double counting_start_time = os::elapsedVTime(); + + // YSR: These look dubious (i.e. redundant) !!! FIX ME + slt()->manipulatePLL(SurrogateLockerThread::acquirePLL); + slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL); + + if (!cm()->has_aborted()) { + double count_start_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-count-start]"); + } + + _sts.join(); + _cm->calcDesiredRegions(); + _sts.leave(); + + if (!cm()->has_aborted()) { + double count_end_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]", + count_end_sec - count_start_sec); + } + } + } else { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game"); + } + double end_time = os::elapsedVTime(); + _vtime_count_accum += (end_time - counting_start_time); + // Update the total virtual time before doing this, since it will try + // to measure it to get the vtime for this marking. We purposely + // neglect the presumably-short "completeCleanup" phase here. + _vtime_accum = (end_time - _vtime_start); + if (!cm()->has_aborted()) { + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + CMCleanUp cl_cl(_cm); + sprintf(verbose_str, "GC cleanup"); + VM_CGC_Operation op(&cl_cl, verbose_str); + VMThread::execute(&op); + } else { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup"); + G1CollectedHeap::heap()->set_marking_complete(); + } + + if (!cm()->has_aborted()) { + double cleanup_start_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-cleanup-start]"); + } + + // Now do the remainder of the cleanup operation. + _sts.join(); + _cm->completeCleanup(); + if (!cm()->has_aborted()) { + g1_policy->record_concurrent_mark_cleanup_completed(); + + double cleanup_end_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]", + cleanup_end_sec - cleanup_start_sec); + } + } + _sts.leave(); + } + // We're done: no more unclean regions coming. + G1CollectedHeap::heap()->set_unclean_regions_coming(false); + + if (cm()->has_aborted()) { + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-abort]"); + } + } + + _sts.join(); + _cm->disable_co_trackers(); + _sts.leave(); + + // we now want to allow clearing of the marking bitmap to be + // suspended by a collection pause. + _sts.join(); + _cm->clearNextBitmap(); + _sts.leave(); + } + } + assert(_should_terminate, "just checking"); + + terminate(); +} + + +void ConcurrentMarkThread::yield() { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield"); + _sts.yield("Concurrent Mark"); + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end"); +} + +void ConcurrentMarkThread::stop() { + // it is ok to take late safepoints here, if needed + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + while (!_has_terminated) { + Terminator_lock->wait(); + } + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop"); +} + +void ConcurrentMarkThread::print() { + gclog_or_tty->print("\"Concurrent Mark GC Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + +void ConcurrentMarkThread::sleepBeforeNextCycle() { + clear_in_progress(); + // We join here because we don't want to do the "shouldConcurrentMark()" + // below while the world is otherwise stopped. + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + while (!started()) { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping"); + CGC_lock->wait(Mutex::_no_safepoint_check_flag); + } + set_in_progress(); + clear_started(); + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting"); + + return; +} + +// Note: this method, although exported by the ConcurrentMarkSweepThread, +// which is a non-JavaThread, can only be called by a JavaThread. +// Currently this is done at vm creation time (post-vm-init) by the +// main/Primordial (Java)Thread. +// XXX Consider changing this in the future to allow the CMS thread +// itself to create this thread? +void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) { + assert(_slt == NULL, "SLT already created"); + _slt = SurrogateLockerThread::make(THREAD); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,84 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The Concurrent Mark GC Thread (could be several in the future). +// This is copied from the Concurrent Mark Sweep GC Thread +// Still under construction. + +class ConcurrentMark; + +class ConcurrentMarkThread: public ConcurrentGCThread { + friend class VMStructs; + + double _vtime_start; // Initial virtual time. + double _vtime_accum; // Accumulated virtual time. + + double _vtime_mark_accum; + double _vtime_count_accum; + + public: + virtual void run(); + + private: + ConcurrentMark* _cm; + bool _started; + bool _in_progress; + + void sleepBeforeNextCycle(); + + static SurrogateLockerThread* _slt; + + public: + // Constructor + ConcurrentMarkThread(ConcurrentMark* cm); + + static void makeSurrogateLockerThread(TRAPS); + static SurrogateLockerThread* slt() { return _slt; } + + // Printing + void print(); + + // Total virtual time so far. + double vtime_accum(); + // Marking virtual time so far + double vtime_mark_accum(); + // Counting virtual time so far. + double vtime_count_accum() { return _vtime_count_accum; } + + ConcurrentMark* cm() { return _cm; } + + void set_started() { _started = true; } + void clear_started() { _started = false; } + bool started() { return _started; } + + void set_in_progress() { _in_progress = true; } + void clear_in_progress() { _in_progress = false; } + bool in_progress() { return _in_progress; } + + // Yield for GC + void yield(); + + // shutdown + static void stop(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,33 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + + // Total virtual time so far. +inline double ConcurrentMarkThread::vtime_accum() { + return _vtime_accum + _cm->all_task_accum_vtime(); +} + +// Marking virtual time so far +inline double ConcurrentMarkThread::vtime_mark_accum() { + return _vtime_mark_accum + _cm->all_task_accum_vtime(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentZFThread.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,191 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentZFThread.cpp.incl" + +// ======= Concurrent Zero-Fill Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +int ConcurrentZFThread::_region_allocs = 0; +int ConcurrentZFThread::_sync_zfs = 0; +int ConcurrentZFThread::_zf_waits = 0; +int ConcurrentZFThread::_regions_filled = 0; + +ConcurrentZFThread::ConcurrentZFThread() : + ConcurrentGCThread(), + _co_tracker(G1ZFGroup) +{ + create_and_start(); +} + +void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) { + assert(ZF_mon->owned_by_self(), "Precondition."); + note_zf_wait(); + while (hr->zero_fill_state() == HeapRegion::ZeroFilling) { + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + } +} + +void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) { + assert(!Universe::heap()->is_gc_active(), + "This should not happen during GC."); + assert(hr != NULL, "Precondition"); + // These are unlocked reads, but if this test is successful, then no + // other thread will attempt this zero filling. Only a GC thread can + // modify the ZF state of a region whose state is zero-filling, and this + // should only happen while the ZF thread is locking out GC. + if (hr->zero_fill_state() == HeapRegion::ZeroFilling + && hr->zero_filler() == Thread::current()) { + assert(hr->top() == hr->bottom(), "better be empty!"); + assert(!hr->isHumongous(), "Only free regions on unclean list."); + Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize); + note_region_filled(); + } +} + +void ConcurrentZFThread::run() { + initialize_in_thread(); + Thread* thr_self = Thread::current(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + _co_tracker.enable(); + _co_tracker.start(); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + _sts.join(); + while (!_should_terminate) { + _sts.leave(); + + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + + // This local variable will hold a region being zero-filled. This + // region will neither be on the unclean or zero-filled lists, and + // will not be available for allocation; thus, we might have an + // allocation fail, causing a full GC, because of this, but this is a + // price we will pay. (In future, we might want to make the fact + // that there's a region being zero-filled apparent to the G1 heap, + // which could then wait for it in this extreme case...) + HeapRegion* to_fill; + + while (!g1->should_zf() + || (to_fill = g1->pop_unclean_region_list_locked()) == NULL) + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling) + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + + // So now to_fill is non-NULL and is not ZeroFilling. It might be + // Allocated or ZeroFilled. (The latter could happen if this thread + // starts the zero-filling of a region, but a GC intervenes and + // pushes new regions needing on the front of the filling on the + // front of the list.) + + switch (to_fill->zero_fill_state()) { + case HeapRegion::Allocated: + to_fill = NULL; + break; + + case HeapRegion::NotZeroFilled: + to_fill->set_zero_fill_in_progress(thr_self); + + ZF_mon->unlock(); + _sts.join(); + processHeapRegion(to_fill); + _sts.leave(); + ZF_mon->lock_without_safepoint_check(); + + if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling + && to_fill->zero_filler() == thr_self) { + to_fill->set_zero_fill_complete(); + (void)g1->put_free_region_on_list_locked(to_fill); + } + break; + + case HeapRegion::ZeroFilled: + (void)g1->put_free_region_on_list_locked(to_fill); + break; + + case HeapRegion::ZeroFilling: + ShouldNotReachHere(); + break; + } + } + _vtime_accum = (os::elapsedVTime() - _vtime_start); + _sts.join(); + + _co_tracker.update(); + } + _co_tracker.update(false); + _sts.leave(); + + assert(_should_terminate, "just checking"); + terminate(); +} + +bool ConcurrentZFThread::offer_yield() { + if (_sts.should_yield()) { + _sts.yield("Concurrent ZF"); + return true; + } else { + return false; + } +} + +void ConcurrentZFThread::stop() { + // it is ok to take late safepoints here, if needed + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + while (!_has_terminated) { + Terminator_lock->wait(); + } +} + +void ConcurrentZFThread::print() { + gclog_or_tty->print("\"Concurrent ZF Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + + +double ConcurrentZFThread::_vtime_accum; + +void ConcurrentZFThread::print_summary_info() { + gclog_or_tty->print("\nConcurrent Zero-Filling:\n"); + gclog_or_tty->print(" Filled %d regions, used %5.2fs.\n", + _regions_filled, + vtime_accum()); + gclog_or_tty->print(" Of %d region allocs, %d (%5.2f%%) required sync ZF,\n", + _region_allocs, _sync_zfs, + (_region_allocs > 0 ? + (float)_sync_zfs/(float)_region_allocs*100.0 : + 0.0)); + gclog_or_tty->print(" and %d (%5.2f%%) required a ZF wait.\n", + _zf_waits, + (_region_allocs > 0 ? + (float)_zf_waits/(float)_region_allocs*100.0 : + 0.0)); + +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentZFThread.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,85 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The Concurrent ZF Thread. Performs concurrent zero-filling. + +class ConcurrentZFThread: public ConcurrentGCThread { + friend class VMStructs; + friend class ZeroFillRegionClosure; + + private: + + // Zero fill the heap region. + void processHeapRegion(HeapRegion* r); + + // Stats + // Allocation (protected by heap lock). + static int _region_allocs; // Number of regions allocated + static int _sync_zfs; // Synchronous zero-fills + + static int _zf_waits; // Wait for conc zero-fill completion. + + // Number of regions CFZ thread fills. + static int _regions_filled; + + COTracker _co_tracker; + + double _vtime_start; // Initial virtual time. + + // These are static because the "print_summary_info" method is, and + // it currently assumes there is only one ZF thread. We'll change when + // we need to. + static double _vtime_accum; // Initial virtual time. + static double vtime_accum() { return _vtime_accum; } + + // Offer yield for GC. Returns true if yield occurred. + bool offer_yield(); + + public: + // Constructor + ConcurrentZFThread(); + + // Main loop. + virtual void run(); + + // Printing + void print(); + + // Waits until "r" has been zero-filled. Requires caller to hold the + // ZF_mon. + static void wait_for_ZF_completed(HeapRegion* r); + + // Get or clear the current unclean region. Should be done + // while holding the ZF_needed_mon lock. + + // shutdown + static void stop(); + + // Stats + static void note_region_alloc() {_region_allocs++; } + static void note_sync_zfs() { _sync_zfs++; } + static void note_zf_wait() { _zf_waits++; } + static void note_region_filled() { _regions_filled++; } + + static void print_summary_info(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,307 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_dirtyCardQueue.cpp.incl" + +bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl, + bool consume, + size_t worker_i) { + bool res = true; + if (_buf != NULL) { + res = apply_closure_to_buffer(cl, _buf, _index, _sz, + consume, + (int) worker_i); + if (res && consume) _index = _sz; + } + return res; +} + +bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl, + void** buf, + size_t index, size_t sz, + bool consume, + int worker_i) { + if (cl == NULL) return true; + for (size_t i = index; i < sz; i += oopSize) { + int ind = byte_index_to_index((int)i); + jbyte* card_ptr = (jbyte*)buf[ind]; + if (card_ptr != NULL) { + // Set the entry to null, so we don't do it again (via the test + // above) if we reconsider this buffer. + if (consume) buf[ind] = NULL; + if (!cl->do_card_ptr(card_ptr, worker_i)) return false; + } + } + return true; +} + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + +DirtyCardQueueSet::DirtyCardQueueSet() : + PtrQueueSet(true /*notify_when_complete*/), + _closure(NULL), + _shared_dirty_card_queue(this, true /*perm*/), + _free_ids(NULL), + _processed_buffers_mut(0), _processed_buffers_rs_thread(0) +{ + _all_active = true; +} + +size_t DirtyCardQueueSet::num_par_ids() { + return MAX2(ParallelGCThreads, (size_t)2); +} + + +void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue, + Mutex* lock) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + set_buffer_size(DCQBarrierQueueBufferSize); + set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); + + _shared_dirty_card_queue.set_lock(lock); + _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); + bool b = _free_ids->claim_perm_id(0); + guarantee(b, "Must reserve id zero for concurrent refinement thread."); +} + +void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) { + t->dirty_card_queue().handle_zero_index(); +} + +void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) { + _closure = closure; +} + +void DirtyCardQueueSet::iterate_closure_all_threads(bool consume, + size_t worker_i) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + for(JavaThread* t = Threads::first(); t; t = t->next()) { + bool b = t->dirty_card_queue().apply_closure(_closure, consume); + guarantee(b, "Should not be interrupted."); + } + bool b = shared_dirty_card_queue()->apply_closure(_closure, + consume, + worker_i); + guarantee(b, "Should not be interrupted."); +} + +bool DirtyCardQueueSet::mut_process_buffer(void** buf) { + + // Used to determine if we had already claimed a par_id + // before entering this method. + bool already_claimed = false; + + // We grab the current JavaThread. + JavaThread* thread = JavaThread::current(); + + // We get the the number of any par_id that this thread + // might have already claimed. + int worker_i = thread->get_claimed_par_id(); + + // If worker_i is not -1 then the thread has already claimed + // a par_id. We make note of it using the already_claimed value + if (worker_i != -1) { + already_claimed = true; + } else { + + // Otherwise we need to claim a par id + worker_i = _free_ids->claim_par_id(); + + // And store the par_id value in the thread + thread->set_claimed_par_id(worker_i); + } + + bool b = false; + if (worker_i != -1) { + b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0, + _sz, true, worker_i); + if (b) Atomic::inc(&_processed_buffers_mut); + + // If we had not claimed an id before entering the method + // then we must release the id. + if (!already_claimed) { + + // we release the id + _free_ids->release_par_id(worker_i); + + // and set the claimed_id in the thread to -1 + thread->set_claimed_par_id(-1); + } + } + return b; +} + +DirtyCardQueueSet::CompletedBufferNode* +DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) { + CompletedBufferNode* nd = NULL; + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + + if ((int)_n_completed_buffers <= stop_at) { + _process_completed = false; + return NULL; + } + + if (_completed_buffers_head != NULL) { + nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + if (_completed_buffers_head == NULL) + _completed_buffers_tail = NULL; + _n_completed_buffers--; + } + debug_only(assert_completed_buffer_list_len_correct_locked()); + return nd; +} + +// We only do this in contexts where there is no concurrent enqueueing. +DirtyCardQueueSet::CompletedBufferNode* +DirtyCardQueueSet::get_completed_buffer_CAS() { + CompletedBufferNode* nd = _completed_buffers_head; + + while (nd != NULL) { + CompletedBufferNode* next = nd->next; + CompletedBufferNode* result = + (CompletedBufferNode*)Atomic::cmpxchg_ptr(next, + &_completed_buffers_head, + nd); + if (result == nd) { + return result; + } else { + nd = _completed_buffers_head; + } + } + assert(_completed_buffers_head == NULL, "Loop post"); + _completed_buffers_tail = NULL; + return NULL; +} + +bool DirtyCardQueueSet:: +apply_closure_to_completed_buffer_helper(int worker_i, + CompletedBufferNode* nd) { + if (nd != NULL) { + bool b = + DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, + nd->index, _sz, + true, worker_i); + void** buf = nd->buf; + delete nd; + if (b) { + deallocate_buffer(buf); + return true; // In normal case, go on to next buffer. + } else { + enqueue_complete_buffer(buf, nd->index, true); + return false; + } + } else { + return false; + } +} + +bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i, + int stop_at, + bool with_CAS) +{ + CompletedBufferNode* nd = NULL; + if (with_CAS) { + guarantee(stop_at == 0, "Precondition"); + nd = get_completed_buffer_CAS(); + } else { + nd = get_completed_buffer_lock(stop_at); + } + bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); + if (res) _processed_buffers_rs_thread++; + return res; +} + +void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { + CompletedBufferNode* nd = _completed_buffers_head; + while (nd != NULL) { + bool b = + DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, + false); + guarantee(b, "Should not stop early."); + nd = nd->next; + } +} + +void DirtyCardQueueSet::abandon_logs() { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + CompletedBufferNode* buffers_to_delete = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + while (_completed_buffers_head != NULL) { + CompletedBufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + nd->next = buffers_to_delete; + buffers_to_delete = nd; + } + _n_completed_buffers = 0; + _completed_buffers_tail = NULL; + debug_only(assert_completed_buffer_list_len_correct_locked()); + } + while (buffers_to_delete != NULL) { + CompletedBufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next; + deallocate_buffer(nd->buf); + delete nd; + } + // Since abandon is done only at safepoints, we can safely manipulate + // these queues. + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->dirty_card_queue().reset(); + } + shared_dirty_card_queue()->reset(); +} + + +void DirtyCardQueueSet::concatenate_logs() { + // Iterate over all the threads, if we find a partial log add it to + // the global list of logs. Temporarily turn off the limit on the number + // of outstanding buffers. + int save_max_completed_queue = _max_completed_queue; + _max_completed_queue = max_jint; + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + for (JavaThread* t = Threads::first(); t; t = t->next()) { + DirtyCardQueue& dcq = t->dirty_card_queue(); + if (dcq.size() != 0) { + void **buf = t->dirty_card_queue().get_buf(); + // We must NULL out the unused entries, then enqueue. + for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) { + buf[PtrQueue::byte_index_to_index((int)i)] = NULL; + } + enqueue_complete_buffer(dcq.get_buf(), dcq.get_index()); + dcq.reinitialize(); + } + } + if (_shared_dirty_card_queue.size() != 0) { + enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(), + _shared_dirty_card_queue.get_index()); + _shared_dirty_card_queue.reinitialize(); + } + // Restore the completed buffer queue limit. + _max_completed_queue = save_max_completed_queue; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,152 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class FreeIdSet; + +// A closure class for processing card table entries. Note that we don't +// require these closure objects to be stack-allocated. +class CardTableEntryClosure: public CHeapObj { +public: + // Process the card whose card table entry is "card_ptr". If returns + // "false", terminate the iteration early. + virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0; +}; + +// A ptrQueue whose elements are "oops", pointers to object heads. +class DirtyCardQueue: public PtrQueue { +public: + DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) : + PtrQueue(qset_, perm) + { + // Dirty card queues are always active. + _active = true; + } + // Apply the closure to all elements, and reset the index to make the + // buffer empty. If a closure application returns "false", return + // "false" immediately, halting the iteration. If "consume" is true, + // deletes processed entries from logs. + bool apply_closure(CardTableEntryClosure* cl, + bool consume = true, + size_t worker_i = 0); + + // Apply the closure to all elements of "buf", down to "index" + // (inclusive.) If returns "false", then a closure application returned + // "false", and we return immediately. If "consume" is true, entries are + // set to NULL as they are processed, so they will not be processed again + // later. + static bool apply_closure_to_buffer(CardTableEntryClosure* cl, + void** buf, size_t index, size_t sz, + bool consume = true, + int worker_i = 0); + void **get_buf() { return _buf;} + void set_buf(void **buf) {_buf = buf;} + size_t get_index() { return _index;} + void reinitialize() { _buf = 0; _sz = 0; _index = 0;} +}; + + + +class DirtyCardQueueSet: public PtrQueueSet { + CardTableEntryClosure* _closure; + + DirtyCardQueue _shared_dirty_card_queue; + + // Override. + bool mut_process_buffer(void** buf); + + // Protected by the _cbl_mon. + FreeIdSet* _free_ids; + + // The number of completed buffers processed by mutator and rs thread, + // respectively. + jint _processed_buffers_mut; + jint _processed_buffers_rs_thread; + +public: + DirtyCardQueueSet(); + + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0, + Mutex* lock = NULL); + + // The number of parallel ids that can be claimed to allow collector or + // mutator threads to do card-processing work. + static size_t num_par_ids(); + + static void handle_zero_index_for_thread(JavaThread* t); + + // Register "blk" as "the closure" for all queues. Only one such closure + // is allowed. The "apply_closure_to_completed_buffer" method will apply + // this closure to a completed buffer, and "iterate_closure_all_threads" + // applies it to partially-filled buffers (the latter should only be done + // with the world stopped). + void set_closure(CardTableEntryClosure* closure); + + // If there is a registered closure for buffers, apply it to all entries + // in all currently-active buffers. This should only be applied at a + // safepoint. (Currently must not be called in parallel; this should + // change in the future.) If "consume" is true, processed entries are + // discarded. + void iterate_closure_all_threads(bool consume = true, + size_t worker_i = 0); + + // If there exists some completed buffer, pop it, then apply the + // registered closure to all its elements, nulling out those elements + // processed. If all elements are processed, returns "true". If no + // completed buffers exist, returns false. If a completed buffer exists, + // but is only partially completed before a "yield" happens, the + // partially completed buffer (with its processed elements set to NULL) + // is returned to the completed buffer set, and this call returns false. + bool apply_closure_to_completed_buffer(int worker_i = 0, + int stop_at = 0, + bool with_CAS = false); + bool apply_closure_to_completed_buffer_helper(int worker_i, + CompletedBufferNode* nd); + + CompletedBufferNode* get_completed_buffer_CAS(); + CompletedBufferNode* get_completed_buffer_lock(int stop_at); + // Applies the current closure to all completed buffers, + // non-consumptively. + void apply_closure_to_all_completed_buffers(); + + DirtyCardQueue* shared_dirty_card_queue() { + return &_shared_dirty_card_queue; + } + + // If a full collection is happening, reset partial logs, and ignore + // completed ones: the full collection will make them all irrelevant. + void abandon_logs(); + + // If any threads have partial logs, add them to the global list of logs. + void concatenate_logs(); + void clear_n_completed_buffers() { _n_completed_buffers = 0;} + + jint processed_buffers_mut() { + return _processed_buffers_mut; + } + jint processed_buffers_rs_thread() { + return _processed_buffers_rs_thread; + } + +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,624 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1BlockOffsetTable.cpp.incl" + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetSharedArray +////////////////////////////////////////////////////////////////////// + +G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved, + size_t init_word_size) : + _reserved(reserved), _end(NULL) +{ + size_t size = compute_size(reserved.word_size()); + ReservedSpace rs(ReservedSpace::allocation_align_size_up(size)); + if (!rs.is_reserved()) { + vm_exit_during_initialization("Could not reserve enough space for heap offset array"); + } + if (!_vs.initialize(rs, 0)) { + vm_exit_during_initialization("Could not reserve enough space for heap offset array"); + } + _offset_array = (u_char*)_vs.low_boundary(); + resize(init_word_size); + if (TraceBlockOffsetTable) { + gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: "); + gclog_or_tty->print_cr(" " + " rs.base(): " INTPTR_FORMAT + " rs.size(): " INTPTR_FORMAT + " rs end(): " INTPTR_FORMAT, + rs.base(), rs.size(), rs.base() + rs.size()); + gclog_or_tty->print_cr(" " + " _vs.low_boundary(): " INTPTR_FORMAT + " _vs.high_boundary(): " INTPTR_FORMAT, + _vs.low_boundary(), + _vs.high_boundary()); + } +} + +void G1BlockOffsetSharedArray::resize(size_t new_word_size) { + assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved"); + size_t new_size = compute_size(new_word_size); + size_t old_size = _vs.committed_size(); + size_t delta; + char* high = _vs.high(); + _end = _reserved.start() + new_word_size; + if (new_size > old_size) { + delta = ReservedSpace::page_align_size_up(new_size - old_size); + assert(delta > 0, "just checking"); + if (!_vs.expand_by(delta)) { + // Do better than this for Merlin + vm_exit_out_of_memory(delta, "offset table expansion"); + } + assert(_vs.high() == high + delta, "invalid expansion"); + // Initialization of the contents is left to the + // G1BlockOffsetArray that uses it. + } else { + delta = ReservedSpace::page_align_size_down(old_size - new_size); + if (delta == 0) return; + _vs.shrink_by(delta); + assert(_vs.high() == high - delta, "invalid expansion"); + } +} + +bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const { + assert(p >= _reserved.start(), "just checking"); + size_t delta = pointer_delta(p, _reserved.start()); + return (delta & right_n_bits(LogN_words)) == (size_t)NoBits; +} + + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetArray +////////////////////////////////////////////////////////////////////// + +G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array, + MemRegion mr, bool init_to_zero) : + G1BlockOffsetTable(mr.start(), mr.end()), + _unallocated_block(_bottom), + _array(array), _csp(NULL), + _init_to_zero(init_to_zero) { + assert(_bottom <= _end, "arguments out of order"); + if (!_init_to_zero) { + // initialize cards to point back to mr.start() + set_remainder_to_point_to_start(mr.start() + N_words, mr.end()); + _array->set_offset_array(0, 0); // set first card to 0 + } +} + +void G1BlockOffsetArray::set_space(Space* sp) { + _sp = sp; + _csp = sp->toContiguousSpace(); +} + +// The arguments follow the normal convention of denoting +// a right-open interval: [start, end) +void +G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) { + + if (start >= end) { + // The start address is equal to the end address (or to + // the right of the end address) so there are not cards + // that need to be updated.. + return; + } + + // Write the backskip value for each region. + // + // offset + // card 2nd 3rd + // | +- 1st | | + // v v v v + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+- + // |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ... + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+- + // 11 19 75 + // 12 + // + // offset card is the card that points to the start of an object + // x - offset value of offset card + // 1st - start of first logarithmic region + // 0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1 + // 2nd - start of second logarithmic region + // 1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8 + // 3rd - start of third logarithmic region + // 2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64 + // + // integer below the block offset entry is an example of + // the index of the entry + // + // Given an address, + // Find the index for the address + // Find the block offset table entry + // Convert the entry to a back slide + // (e.g., with today's, offset = 0x81 => + // back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8 + // Move back N (e.g., 8) entries and repeat with the + // value of the new entry + // + size_t start_card = _array->index_for(start); + size_t end_card = _array->index_for(end-1); + assert(start ==_array->address_for_index(start_card), "Precondition"); + assert(end ==_array->address_for_index(end_card)+N_words, "Precondition"); + set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval +} + +// Unlike the normal convention in this code, the argument here denotes +// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start() +// above. +void +G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) { + if (start_card > end_card) { + return; + } + assert(start_card > _array->index_for(_bottom), "Cannot be first card"); + assert(_array->offset_array(start_card-1) <= N_words, + "Offset card has an unexpected value"); + size_t start_card_for_region = start_card; + u_char offset = max_jubyte; + for (int i = 0; i < BlockOffsetArray::N_powers; i++) { + // -1 so that the the card with the actual offset is counted. Another -1 + // so that the reach ends in this region and not at the start + // of the next. + size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1); + offset = N_words + i; + if (reach >= end_card) { + _array->set_offset_array(start_card_for_region, end_card, offset); + start_card_for_region = reach + 1; + break; + } + _array->set_offset_array(start_card_for_region, reach, offset); + start_card_for_region = reach + 1; + } + assert(start_card_for_region > end_card, "Sanity check"); + DEBUG_ONLY(check_all_cards(start_card, end_card);) +} + +// The block [blk_start, blk_end) has been allocated; +// adjust the block offset table to represent this information; +// right-open interval: [blk_start, blk_end) +void +G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) { + mark_block(blk_start, blk_end); + allocated(blk_start, blk_end); +} + +// Adjust BOT to show that a previously whole block has been split +// into two. +void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size, + size_t left_blk_size) { + // Verify that the BOT shows [blk, blk + blk_size) to be one block. + verify_single_block(blk, blk_size); + // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size) + // is one single block. + mark_block(blk + left_blk_size, blk + blk_size); +} + + +// Action_mark - update the BOT for the block [blk_start, blk_end). +// Current typical use is for splitting a block. +// Action_single - udpate the BOT for an allocation. +// Action_verify - BOT verification. +void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start, + HeapWord* blk_end, + Action action) { + assert(Universe::heap()->is_in_reserved(blk_start), + "reference must be into the heap"); + assert(Universe::heap()->is_in_reserved(blk_end-1), + "limit must be within the heap"); + // This is optimized to make the test fast, assuming we only rarely + // cross boundaries. + uintptr_t end_ui = (uintptr_t)(blk_end - 1); + uintptr_t start_ui = (uintptr_t)blk_start; + // Calculate the last card boundary preceding end of blk + intptr_t boundary_before_end = (intptr_t)end_ui; + clear_bits(boundary_before_end, right_n_bits(LogN)); + if (start_ui <= (uintptr_t)boundary_before_end) { + // blk starts at or crosses a boundary + // Calculate index of card on which blk begins + size_t start_index = _array->index_for(blk_start); + // Index of card on which blk ends + size_t end_index = _array->index_for(blk_end - 1); + // Start address of card on which blk begins + HeapWord* boundary = _array->address_for_index(start_index); + assert(boundary <= blk_start, "blk should start at or after boundary"); + if (blk_start != boundary) { + // blk starts strictly after boundary + // adjust card boundary and start_index forward to next card + boundary += N_words; + start_index++; + } + assert(start_index <= end_index, "monotonicity of index_for()"); + assert(boundary <= (HeapWord*)boundary_before_end, "tautology"); + switch (action) { + case Action_mark: { + if (init_to_zero()) { + _array->set_offset_array(start_index, boundary, blk_start); + break; + } // Else fall through to the next case + } + case Action_single: { + _array->set_offset_array(start_index, boundary, blk_start); + // We have finished marking the "offset card". We need to now + // mark the subsequent cards that this blk spans. + if (start_index < end_index) { + HeapWord* rem_st = _array->address_for_index(start_index) + N_words; + HeapWord* rem_end = _array->address_for_index(end_index) + N_words; + set_remainder_to_point_to_start(rem_st, rem_end); + } + break; + } + case Action_check: { + _array->check_offset_array(start_index, boundary, blk_start); + // We have finished checking the "offset card". We need to now + // check the subsequent cards that this blk spans. + check_all_cards(start_index + 1, end_index); + break; + } + default: + ShouldNotReachHere(); + } + } +} + +// The card-interval [start_card, end_card] is a closed interval; this +// is an expensive check -- use with care and only under protection of +// suitable flag. +void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const { + + if (end_card < start_card) { + return; + } + guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card"); + for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) { + u_char entry = _array->offset_array(c); + if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) { + guarantee(entry > N_words, "Should be in logarithmic region"); + } + size_t backskip = BlockOffsetArray::entry_to_cards_back(entry); + size_t landing_card = c - backskip; + guarantee(landing_card >= (start_card - 1), "Inv"); + if (landing_card >= start_card) { + guarantee(_array->offset_array(landing_card) <= entry, "monotonicity"); + } else { + guarantee(landing_card == start_card - 1, "Tautology"); + guarantee(_array->offset_array(landing_card) <= N_words, "Offset value"); + } + } +} + +// The range [blk_start, blk_end) represents a single contiguous block +// of storage; modify the block offset table to represent this +// information; Right-open interval: [blk_start, blk_end) +// NOTE: this method does _not_ adjust _unallocated_block. +void +G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) { + do_block_internal(blk_start, blk_end, Action_single); +} + +// Mark the BOT such that if [blk_start, blk_end) straddles a card +// boundary, the card following the first such boundary is marked +// with the appropriate offset. +// NOTE: this method does _not_ adjust _unallocated_block or +// any cards subsequent to the first one. +void +G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) { + do_block_internal(blk_start, blk_end, Action_mark); +} + +void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) { + HeapWord* blk1_start = Universe::heap()->block_start(blk1); + HeapWord* blk2_start = Universe::heap()->block_start(blk2); + assert(blk1 == blk1_start && blk2 == blk2_start, + "Must be block starts."); + assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous."); + size_t blk1_start_index = _array->index_for(blk1); + size_t blk2_start_index = _array->index_for(blk2); + assert(blk1_start_index <= blk2_start_index, "sanity"); + HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index); + if (blk2 == blk2_card_start) { + // blk2 starts a card. Does blk1 start on the prevous card, or futher + // back? + assert(blk1_start_index < blk2_start_index, "must be lower card."); + if (blk1_start_index + 1 == blk2_start_index) { + // previous card; new value for blk2 card is size of blk1. + _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1)); + } else { + // Earlier card; go back a card. + _array->set_offset_array(blk2_start_index, N_words); + } + } else { + // blk2 does not start a card. Does it cross a card? If not, nothing + // to do. + size_t blk2_end_index = + _array->index_for(blk2 + _sp->block_size(blk2) - 1); + assert(blk2_end_index >= blk2_start_index, "sanity"); + if (blk2_end_index > blk2_start_index) { + // Yes, it crosses a card. The value for the next card must change. + if (blk1_start_index + 1 == blk2_start_index) { + // previous card; new value for second blk2 card is size of blk1. + _array->set_offset_array(blk2_start_index + 1, + (u_char) _sp->block_size(blk1)); + } else { + // Earlier card; go back a card. + _array->set_offset_array(blk2_start_index + 1, N_words); + } + } + } +} + +HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + // Otherwise, find the block start using the table. + HeapWord* q = block_at_or_preceding(addr, false, 0); + return forward_to_block_containing_addr(q, addr); +} + +// This duplicates a little code from the above: unavoidable. +HeapWord* +G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + // Otherwise, find the block start using the table. + HeapWord* q = block_at_or_preceding(addr, false, 0); + HeapWord* n = q + _sp->block_size(q); + return forward_to_block_containing_addr_const(q, n, addr); +} + + +HeapWord* +G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q, + HeapWord* n, + const void* addr) { + // We're not in the normal case. We need to handle an important subcase + // here: LAB allocation. An allocation previously recorded in the + // offset table was actually a lab allocation, and was divided into + // several objects subsequently. Fix this situation as we answer the + // query, by updating entries as we cross them. + size_t next_index = _array->index_for(n) + 1; + HeapWord* next_boundary = _array->address_for_index(next_index); + if (csp() != NULL) { + if (addr >= csp()->top()) return csp()->top(); + while (next_boundary < addr) { + while (n <= next_boundary) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += obj->size(); + } + assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); + // [q, n) is the block that crosses the boundary. + alloc_block_work2(&next_boundary, &next_index, q, n); + } + } else { + while (next_boundary < addr) { + while (n <= next_boundary) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += _sp->block_size(q); + } + assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); + // [q, n) is the block that crosses the boundary. + alloc_block_work2(&next_boundary, &next_index, q, n); + } + } + return forward_to_block_containing_addr_const(q, n, addr); +} + +HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const { + assert(_array->offset_array(0) == 0, "objects can't cross covered areas"); + + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + + // Otherwise, find the block start using the table, but taking + // care (cf block_start_unsafe() above) not to parse any objects/blocks + // on the cards themsleves. + size_t index = _array->index_for(addr); + assert(_array->address_for_index(index) == addr, + "arg should be start of card"); + + HeapWord* q = (HeapWord*)addr; + uint offset; + do { + offset = _array->offset_array(index--); + q -= offset; + } while (offset == N_words); + assert(q <= addr, "block start should be to left of arg"); + return q; +} + +// Note that the committed size of the covered space may have changed, +// so the table size might also wish to change. +void G1BlockOffsetArray::resize(size_t new_word_size) { + HeapWord* new_end = _bottom + new_word_size; + if (_end < new_end && !init_to_zero()) { + // verify that the old and new boundaries are also card boundaries + assert(_array->is_card_boundary(_end), + "_end not a card boundary"); + assert(_array->is_card_boundary(new_end), + "new _end would not be a card boundary"); + // set all the newly added cards + _array->set_offset_array(_end, new_end, N_words); + } + _end = new_end; // update _end +} + +void G1BlockOffsetArray::set_region(MemRegion mr) { + _bottom = mr.start(); + _end = mr.end(); +} + +// +// threshold_ +// | _index_ +// v v +// +-------+-------+-------+-------+-------+ +// | i-1 | i | i+1 | i+2 | i+3 | +// +-------+-------+-------+-------+-------+ +// ( ^ ] +// block-start +// +void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_, + HeapWord* blk_start, HeapWord* blk_end) { + // For efficiency, do copy-in/copy-out. + HeapWord* threshold = *threshold_; + size_t index = *index_; + + assert(blk_start != NULL && blk_end > blk_start, + "phantom block"); + assert(blk_end > threshold, "should be past threshold"); + assert(blk_start <= threshold, "blk_start should be at or before threshold") + assert(pointer_delta(threshold, blk_start) <= N_words, + "offset should be <= BlockOffsetSharedArray::N"); + assert(Universe::heap()->is_in_reserved(blk_start), + "reference must be into the heap"); + assert(Universe::heap()->is_in_reserved(blk_end-1), + "limit must be within the heap"); + assert(threshold == _array->_reserved.start() + index*N_words, + "index must agree with threshold"); + + DEBUG_ONLY(size_t orig_index = index;) + + // Mark the card that holds the offset into the block. Note + // that _next_offset_index and _next_offset_threshold are not + // updated until the end of this method. + _array->set_offset_array(index, threshold, blk_start); + + // We need to now mark the subsequent cards that this blk spans. + + // Index of card on which blk ends. + size_t end_index = _array->index_for(blk_end - 1); + + // Are there more cards left to be updated? + if (index + 1 <= end_index) { + HeapWord* rem_st = _array->address_for_index(index + 1); + // Calculate rem_end this way because end_index + // may be the last valid index in the covered region. + HeapWord* rem_end = _array->address_for_index(end_index) + N_words; + set_remainder_to_point_to_start(rem_st, rem_end); + } + + index = end_index + 1; + // Calculate threshold_ this way because end_index + // may be the last valid index in the covered region. + threshold = _array->address_for_index(end_index) + N_words; + assert(threshold >= blk_end, "Incorrect offset threshold"); + + // index_ and threshold_ updated here. + *threshold_ = threshold; + *index_ = index; + +#ifdef ASSERT + // The offset can be 0 if the block starts on a boundary. That + // is checked by an assertion above. + size_t start_index = _array->index_for(blk_start); + HeapWord* boundary = _array->address_for_index(start_index); + assert((_array->offset_array(orig_index) == 0 && + blk_start == boundary) || + (_array->offset_array(orig_index) > 0 && + _array->offset_array(orig_index) <= N_words), + "offset array should have been set"); + for (size_t j = orig_index + 1; j <= end_index; j++) { + assert(_array->offset_array(j) > 0 && + _array->offset_array(j) <= + (u_char) (N_words+BlockOffsetArray::N_powers-1), + "offset array should have been set"); + } +#endif +} + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetArrayContigSpace +////////////////////////////////////////////////////////////////////// + +HeapWord* +G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1); + return forward_to_block_containing_addr(q, addr); +} + +HeapWord* +G1BlockOffsetArrayContigSpace:: +block_start_unsafe_const(const void* addr) const { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1); + HeapWord* n = q + _sp->block_size(q); + return forward_to_block_containing_addr_const(q, n, addr); +} + +G1BlockOffsetArrayContigSpace:: +G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, + MemRegion mr) : + G1BlockOffsetArray(array, mr, true) +{ + _next_offset_threshold = NULL; + _next_offset_index = 0; +} + +HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() { + assert(!Universe::heap()->is_in_reserved(_array->_offset_array), + "just checking"); + _next_offset_index = _array->index_for(_bottom); + _next_offset_index++; + _next_offset_threshold = + _array->address_for_index(_next_offset_index); + return _next_offset_threshold; +} + +void G1BlockOffsetArrayContigSpace::zero_bottom_entry() { + assert(!Universe::heap()->is_in_reserved(_array->_offset_array), + "just checking"); + size_t bottom_index = _array->index_for(_bottom); + assert(_array->address_for_index(bottom_index) == _bottom, + "Precondition of call"); + _array->set_offset_array(bottom_index, 0); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,487 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The CollectedHeap type requires subtypes to implement a method +// "block_start". For some subtypes, notably generational +// systems using card-table-based write barriers, the efficiency of this +// operation may be important. Implementations of the "BlockOffsetArray" +// class may be useful in providing such efficient implementations. +// +// While generally mirroring the structure of the BOT for GenCollectedHeap, +// the following types are tailored more towards G1's uses; these should, +// however, be merged back into a common BOT to avoid code duplication +// and reduce maintenance overhead. +// +// G1BlockOffsetTable (abstract) +// -- G1BlockOffsetArray (uses G1BlockOffsetSharedArray) +// -- G1BlockOffsetArrayContigSpace +// +// A main impediment to the consolidation of this code might be the +// effect of making some of the block_start*() calls non-const as +// below. Whether that might adversely affect performance optimizations +// that compilers might normally perform in the case of non-G1 +// collectors needs to be carefully investigated prior to any such +// consolidation. + +// Forward declarations +class ContiguousSpace; +class G1BlockOffsetSharedArray; + +class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC { + friend class VMStructs; +protected: + // These members describe the region covered by the table. + + // The space this table is covering. + HeapWord* _bottom; // == reserved.start + HeapWord* _end; // End of currently allocated region. + +public: + // Initialize the table to cover the given space. + // The contents of the initial table are undefined. + G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) : + _bottom(bottom), _end(end) + { + assert(_bottom <= _end, "arguments out of order"); + } + + // Note that the committed size of the covered space may have changed, + // so the table size might also wish to change. + virtual void resize(size_t new_word_size) = 0; + + virtual void set_bottom(HeapWord* new_bottom) { + assert(new_bottom <= _end, "new_bottom > _end"); + _bottom = new_bottom; + resize(pointer_delta(_end, _bottom)); + } + + // Requires "addr" to be contained by a block, and returns the address of + // the start of that block. (May have side effects, namely updating of + // shared array entries that "point" too far backwards. This can occur, + // for example, when LAB allocation is used in a space covered by the + // table.) + virtual HeapWord* block_start_unsafe(const void* addr) = 0; + // Same as above, but does not have any of the possible side effects + // discussed above. + virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0; + + // Returns the address of the start of the block containing "addr", or + // else "null" if it is covered by no block. (May have side effects, + // namely updating of shared array entries that "point" too far + // backwards. This can occur, for example, when lab allocation is used + // in a space covered by the table.) + inline HeapWord* block_start(const void* addr); + // Same as above, but does not have any of the possible side effects + // discussed above. + inline HeapWord* block_start_const(const void* addr) const; +}; + +// This implementation of "G1BlockOffsetTable" divides the covered region +// into "N"-word subregions (where "N" = 2^"LogN". An array with an entry +// for each such subregion indicates how far back one must go to find the +// start of the chunk that includes the first word of the subregion. +// +// Each BlockOffsetArray is owned by a Space. However, the actual array +// may be shared by several BlockOffsetArrays; this is useful +// when a single resizable area (such as a generation) is divided up into +// several spaces in which contiguous allocation takes place, +// such as, for example, in G1 or in the train generation.) + +// Here is the shared array type. + +class G1BlockOffsetSharedArray: public CHeapObj { + friend class G1BlockOffsetArray; + friend class G1BlockOffsetArrayContigSpace; + friend class VMStructs; + +private: + // The reserved region covered by the shared array. + MemRegion _reserved; + + // End of the current committed region. + HeapWord* _end; + + // Array for keeping offsets for retrieving object start fast given an + // address. + VirtualSpace _vs; + u_char* _offset_array; // byte array keeping backwards offsets + + // Bounds checking accessors: + // For performance these have to devolve to array accesses in product builds. + u_char offset_array(size_t index) const { + assert(index < _vs.committed_size(), "index out of range"); + return _offset_array[index]; + } + + void set_offset_array(size_t index, u_char offset) { + assert(index < _vs.committed_size(), "index out of range"); + assert(offset <= N_words, "offset too large"); + _offset_array[index] = offset; + } + + void set_offset_array(size_t index, HeapWord* high, HeapWord* low) { + assert(index < _vs.committed_size(), "index out of range"); + assert(high >= low, "addresses out of order"); + assert(pointer_delta(high, low) <= N_words, "offset too large"); + _offset_array[index] = (u_char) pointer_delta(high, low); + } + + void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) { + assert(index_for(right - 1) < _vs.committed_size(), + "right address out of range"); + assert(left < right, "Heap addresses out of order"); + size_t num_cards = pointer_delta(right, left) >> LogN_words; + memset(&_offset_array[index_for(left)], offset, num_cards); + } + + void set_offset_array(size_t left, size_t right, u_char offset) { + assert(right < _vs.committed_size(), "right address out of range"); + assert(left <= right, "indexes out of order"); + size_t num_cards = right - left + 1; + memset(&_offset_array[left], offset, num_cards); + } + + void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const { + assert(index < _vs.committed_size(), "index out of range"); + assert(high >= low, "addresses out of order"); + assert(pointer_delta(high, low) <= N_words, "offset too large"); + assert(_offset_array[index] == pointer_delta(high, low), + "Wrong offset"); + } + + bool is_card_boundary(HeapWord* p) const; + + // Return the number of slots needed for an offset array + // that covers mem_region_words words. + // We always add an extra slot because if an object + // ends on a card boundary we put a 0 in the next + // offset array slot, so we want that slot always + // to be reserved. + + size_t compute_size(size_t mem_region_words) { + size_t number_of_slots = (mem_region_words / N_words) + 1; + return ReservedSpace::page_align_size_up(number_of_slots); + } + +public: + enum SomePublicConstants { + LogN = 9, + LogN_words = LogN - LogHeapWordSize, + N_bytes = 1 << LogN, + N_words = 1 << LogN_words + }; + + // Initialize the table to cover from "base" to (at least) + // "base + init_word_size". In the future, the table may be expanded + // (see "resize" below) up to the size of "_reserved" (which must be at + // least "init_word_size".) The contents of the initial table are + // undefined; it is the responsibility of the constituent + // G1BlockOffsetTable(s) to initialize cards. + G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size); + + // Notes a change in the committed size of the region covered by the + // table. The "new_word_size" may not be larger than the size of the + // reserved region this table covers. + void resize(size_t new_word_size); + + void set_bottom(HeapWord* new_bottom); + + // Updates all the BlockOffsetArray's sharing this shared array to + // reflect the current "top"'s of their spaces. + void update_offset_arrays(); + + // Return the appropriate index into "_offset_array" for "p". + inline size_t index_for(const void* p) const; + + // Return the address indicating the start of the region corresponding to + // "index" in "_offset_array". + inline HeapWord* address_for_index(size_t index) const; +}; + +// And here is the G1BlockOffsetTable subtype that uses the array. + +class G1BlockOffsetArray: public G1BlockOffsetTable { + friend class G1BlockOffsetSharedArray; + friend class G1BlockOffsetArrayContigSpace; + friend class VMStructs; +private: + enum SomePrivateConstants { + N_words = G1BlockOffsetSharedArray::N_words, + LogN = G1BlockOffsetSharedArray::LogN + }; + + // The following enums are used by do_block_helper + enum Action { + Action_single, // BOT records a single block (see single_block()) + Action_mark, // BOT marks the start of a block (see mark_block()) + Action_check // Check that BOT records block correctly + // (see verify_single_block()). + }; + + // This is the array, which can be shared by several BlockOffsetArray's + // servicing different + G1BlockOffsetSharedArray* _array; + + // The space that owns this subregion. + Space* _sp; + + // If "_sp" is a contiguous space, the field below is the view of "_sp" + // as a contiguous space, else NULL. + ContiguousSpace* _csp; + + // If true, array entries are initialized to 0; otherwise, they are + // initialized to point backwards to the beginning of the covered region. + bool _init_to_zero; + + // The portion [_unallocated_block, _sp.end()) of the space that + // is a single block known not to contain any objects. + // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag. + HeapWord* _unallocated_block; + + // Sets the entries + // corresponding to the cards starting at "start" and ending at "end" + // to point back to the card before "start": the interval [start, end) + // is right-open. + void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end); + // Same as above, except that the args here are a card _index_ interval + // that is closed: [start_index, end_index] + void set_remainder_to_point_to_start_incl(size_t start, size_t end); + + // A helper function for BOT adjustment/verification work + void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action); + +protected: + + ContiguousSpace* csp() const { return _csp; } + + // Returns the address of a block whose start is at most "addr". + // If "has_max_index" is true, "assumes "max_index" is the last valid one + // in the array. + inline HeapWord* block_at_or_preceding(const void* addr, + bool has_max_index, + size_t max_index) const; + + // "q" is a block boundary that is <= "addr"; "n" is the address of the + // next block (or the end of the space.) Return the address of the + // beginning of the block that contains "addr". Does so without side + // effects (see, e.g., spec of block_start.) + inline HeapWord* + forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, + const void* addr) const; + + // "q" is a block boundary that is <= "addr"; return the address of the + // beginning of the block that contains "addr". May have side effects + // on "this", by updating imprecise entries. + inline HeapWord* forward_to_block_containing_addr(HeapWord* q, + const void* addr); + + // "q" is a block boundary that is <= "addr"; "n" is the address of the + // next block (or the end of the space.) Return the address of the + // beginning of the block that contains "addr". May have side effects + // on "this", by updating imprecise entries. + HeapWord* forward_to_block_containing_addr_slow(HeapWord* q, + HeapWord* n, + const void* addr); + + // Requires that "*threshold_" be the first array entry boundary at or + // above "blk_start", and that "*index_" be the corresponding array + // index. If the block starts at or crosses "*threshold_", records + // "blk_start" as the appropriate block start for the array index + // starting at "*threshold_", and for any other indices crossed by the + // block. Updates "*threshold_" and "*index_" to correspond to the first + // index after the block end. + void alloc_block_work2(HeapWord** threshold_, size_t* index_, + HeapWord* blk_start, HeapWord* blk_end); + +public: + // The space may not have it's bottom and top set yet, which is why the + // region is passed as a parameter. If "init_to_zero" is true, the + // elements of the array are initialized to zero. Otherwise, they are + // initialized to point backwards to the beginning. + G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr, + bool init_to_zero); + + // Note: this ought to be part of the constructor, but that would require + // "this" to be passed as a parameter to a member constructor for + // the containing concrete subtype of Space. + // This would be legal C++, but MS VC++ doesn't allow it. + void set_space(Space* sp); + + // Resets the covered region to the given "mr". + void set_region(MemRegion mr); + + // Resets the covered region to one with the same _bottom as before but + // the "new_word_size". + void resize(size_t new_word_size); + + // These must be guaranteed to work properly (i.e., do nothing) + // when "blk_start" ("blk" for second version) is "NULL". + virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end); + virtual void alloc_block(HeapWord* blk, size_t size) { + alloc_block(blk, blk + size); + } + + // The following methods are useful and optimized for a + // general, non-contiguous space. + + // The given arguments are required to be the starts of adjacent ("blk1" + // before "blk2") well-formed blocks covered by "this". After this call, + // they should be considered to form one block. + virtual void join_blocks(HeapWord* blk1, HeapWord* blk2); + + // Given a block [blk_start, blk_start + full_blk_size), and + // a left_blk_size < full_blk_size, adjust the BOT to show two + // blocks [blk_start, blk_start + left_blk_size) and + // [blk_start + left_blk_size, blk_start + full_blk_size). + // It is assumed (and verified in the non-product VM) that the + // BOT was correct for the original block. + void split_block(HeapWord* blk_start, size_t full_blk_size, + size_t left_blk_size); + + // Adjust the BOT to show that it has a single block in the + // range [blk_start, blk_start + size). All necessary BOT + // cards are adjusted, but _unallocated_block isn't. + void single_block(HeapWord* blk_start, HeapWord* blk_end); + void single_block(HeapWord* blk, size_t size) { + single_block(blk, blk + size); + } + + // Adjust BOT to show that it has a block in the range + // [blk_start, blk_start + size). Only the first card + // of BOT is touched. It is assumed (and verified in the + // non-product VM) that the remaining cards of the block + // are correct. + void mark_block(HeapWord* blk_start, HeapWord* blk_end); + void mark_block(HeapWord* blk, size_t size) { + mark_block(blk, blk + size); + } + + // Adjust _unallocated_block to indicate that a particular + // block has been newly allocated or freed. It is assumed (and + // verified in the non-product VM) that the BOT is correct for + // the given block. + inline void allocated(HeapWord* blk_start, HeapWord* blk_end) { + // Verify that the BOT shows [blk, blk + blk_size) to be one block. + verify_single_block(blk_start, blk_end); + if (BlockOffsetArrayUseUnallocatedBlock) { + _unallocated_block = MAX2(_unallocated_block, blk_end); + } + } + + inline void allocated(HeapWord* blk, size_t size) { + allocated(blk, blk + size); + } + + inline void freed(HeapWord* blk_start, HeapWord* blk_end); + + inline void freed(HeapWord* blk, size_t size); + + virtual HeapWord* block_start_unsafe(const void* addr); + virtual HeapWord* block_start_unsafe_const(const void* addr) const; + + // Requires "addr" to be the start of a card and returns the + // start of the block that contains the given address. + HeapWord* block_start_careful(const void* addr) const; + + // If true, initialize array slots with no allocated blocks to zero. + // Otherwise, make them point back to the front. + bool init_to_zero() { return _init_to_zero; } + + // Verification & debugging - ensure that the offset table reflects the fact + // that the block [blk_start, blk_end) or [blk, blk + size) is a + // single block of storage. NOTE: can;t const this because of + // call to non-const do_block_internal() below. + inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) { + if (VerifyBlockOffsetArray) { + do_block_internal(blk_start, blk_end, Action_check); + } + } + + inline void verify_single_block(HeapWord* blk, size_t size) { + verify_single_block(blk, blk + size); + } + + // Verify that the given block is before _unallocated_block + inline void verify_not_unallocated(HeapWord* blk_start, + HeapWord* blk_end) const { + if (BlockOffsetArrayUseUnallocatedBlock) { + assert(blk_start < blk_end, "Block inconsistency?"); + assert(blk_end <= _unallocated_block, "_unallocated_block problem"); + } + } + + inline void verify_not_unallocated(HeapWord* blk, size_t size) const { + verify_not_unallocated(blk, blk + size); + } + + void check_all_cards(size_t left_card, size_t right_card) const; +}; + +// A subtype of BlockOffsetArray that takes advantage of the fact +// that its underlying space is a ContiguousSpace, so that its "active" +// region can be more efficiently tracked (than for a non-contiguous space). +class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray { + friend class VMStructs; + + // allocation boundary at which offset array must be updated + HeapWord* _next_offset_threshold; + size_t _next_offset_index; // index corresponding to that boundary + + // Work function to be called when allocation start crosses the next + // threshold in the contig space. + void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) { + alloc_block_work2(&_next_offset_threshold, &_next_offset_index, + blk_start, blk_end); + } + + + public: + G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr); + + // Initialize the threshold to reflect the first boundary after the + // bottom of the covered region. + HeapWord* initialize_threshold(); + + // Zero out the entry for _bottom (offset will be zero). + void zero_bottom_entry(); + + // Return the next threshold, the point at which the table should be + // updated. + HeapWord* threshold() const { return _next_offset_threshold; } + + // These must be guaranteed to work properly (i.e., do nothing) + // when "blk_start" ("blk" for second version) is "NULL". In this + // implementation, that's true because NULL is represented as 0, and thus + // never exceeds the "_next_offset_threshold". + void alloc_block(HeapWord* blk_start, HeapWord* blk_end) { + if (blk_end > _next_offset_threshold) + alloc_block_work1(blk_start, blk_end); + } + void alloc_block(HeapWord* blk, size_t size) { + alloc_block(blk, blk+size); + } + + HeapWord* block_start_unsafe(const void* addr); + HeapWord* block_start_unsafe_const(const void* addr) const; +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,153 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) { + if (addr >= _bottom && addr < _end) { + return block_start_unsafe(addr); + } else { + return NULL; + } +} + +inline HeapWord* +G1BlockOffsetTable::block_start_const(const void* addr) const { + if (addr >= _bottom && addr < _end) { + return block_start_unsafe_const(addr); + } else { + return NULL; + } +} + +inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const { + char* pc = (char*)p; + assert(pc >= (char*)_reserved.start() && + pc < (char*)_reserved.end(), + "p not in range."); + size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char)); + size_t result = delta >> LogN; + assert(result < _vs.committed_size(), "bad index from address"); + return result; +} + +inline HeapWord* +G1BlockOffsetSharedArray::address_for_index(size_t index) const { + assert(index < _vs.committed_size(), "bad index"); + HeapWord* result = _reserved.start() + (index << LogN_words); + assert(result >= _reserved.start() && result < _reserved.end(), + "bad address from index"); + return result; +} + +inline HeapWord* +G1BlockOffsetArray::block_at_or_preceding(const void* addr, + bool has_max_index, + size_t max_index) const { + assert(_array->offset_array(0) == 0, "objects can't cross covered areas"); + size_t index = _array->index_for(addr); + // We must make sure that the offset table entry we use is valid. If + // "addr" is past the end, start at the last known one and go forward. + if (has_max_index) { + index = MIN2(index, max_index); + } + HeapWord* q = _array->address_for_index(index); + + uint offset = _array->offset_array(index); // Extend u_char to uint. + while (offset >= N_words) { + // The excess of the offset from N_words indicates a power of Base + // to go back by. + size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset); + q -= (N_words * n_cards_back); + assert(q >= _sp->bottom(), "Went below bottom!"); + index -= n_cards_back; + offset = _array->offset_array(index); + } + assert(offset < N_words, "offset too large"); + q -= offset; + return q; +} + +inline HeapWord* +G1BlockOffsetArray:: +forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, + const void* addr) const { + if (csp() != NULL) { + if (addr >= csp()->top()) return csp()->top(); + while (n <= addr) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += obj->size(); + } + } else { + while (n <= addr) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += _sp->block_size(q); + } + } + assert(q <= n, "wrong order for q and addr"); + assert(addr < n, "wrong order for addr and n"); + return q; +} + +inline HeapWord* +G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q, + const void* addr) { + if (oop(q)->klass() == NULL) return q; + HeapWord* n = q + _sp->block_size(q); + // In the normal case, where the query "addr" is a card boundary, and the + // offset table chunks are the same size as cards, the block starting at + // "q" will contain addr, so the test below will fail, and we'll fall + // through quickly. + if (n <= addr) { + q = forward_to_block_containing_addr_slow(q, n, addr); + } + assert(q <= addr, "wrong order for current and arg"); + return q; +} + +////////////////////////////////////////////////////////////////////////// +// BlockOffsetArrayNonContigSpace inlines +////////////////////////////////////////////////////////////////////////// +inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) { + // Verify that the BOT shows [blk_start, blk_end) to be one block. + verify_single_block(blk_start, blk_end); + // adjust _unallocated_block upward or downward + // as appropriate + if (BlockOffsetArrayUseUnallocatedBlock) { + assert(_unallocated_block <= _end, + "Inconsistent value for _unallocated_block"); + if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) { + // CMS-specific note: a block abutting _unallocated_block to + // its left is being freed, a new block is being added or + // we are resetting following a compaction + _unallocated_block = blk_start; + } + } +} + +inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) { + freed(blk, blk + size); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,5355 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1CollectedHeap.cpp.incl" + +// turn it on so that the contents of the young list (scan-only / +// to-be-collected) are printed at "strategic" points before / during +// / after the collection --- this is useful for debugging +#define SCAN_ONLY_VERBOSE 0 +// CURRENT STATUS +// This file is under construction. Search for "FIXME". + +// INVARIANTS/NOTES +// +// All allocation activity covered by the G1CollectedHeap interface is +// serialized by acquiring the HeapLock. This happens in +// mem_allocate_work, which all such allocation functions call. +// (Note that this does not apply to TLAB allocation, which is not part +// of this interface: it is done by clients of this interface.) + +// Local to this file. + +// Finds the first HeapRegion. +// No longer used, but might be handy someday. + +class FindFirstRegionClosure: public HeapRegionClosure { + HeapRegion* _a_region; +public: + FindFirstRegionClosure() : _a_region(NULL) {} + bool doHeapRegion(HeapRegion* r) { + _a_region = r; + return true; + } + HeapRegion* result() { return _a_region; } +}; + + +class RefineCardTableEntryClosure: public CardTableEntryClosure { + SuspendibleThreadSet* _sts; + G1RemSet* _g1rs; + ConcurrentG1Refine* _cg1r; + bool _concurrent; +public: + RefineCardTableEntryClosure(SuspendibleThreadSet* sts, + G1RemSet* g1rs, + ConcurrentG1Refine* cg1r) : + _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true) + {} + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + _g1rs->concurrentRefineOneCard(card_ptr, worker_i); + if (_concurrent && _sts->should_yield()) { + // Caller will actually yield. + return false; + } + // Otherwise, we finished successfully; return true. + return true; + } + void set_concurrent(bool b) { _concurrent = b; } +}; + + +class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure { + int _calls; + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; + int _histo[256]; +public: + ClearLoggedCardTableEntryClosure() : + _calls(0) + { + _g1h = G1CollectedHeap::heap(); + _ctbs = (CardTableModRefBS*)_g1h->barrier_set(); + for (int i = 0; i < 256; i++) _histo[i] = 0; + } + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) { + _calls++; + unsigned char* ujb = (unsigned char*)card_ptr; + int ind = (int)(*ujb); + _histo[ind]++; + *card_ptr = -1; + } + return true; + } + int calls() { return _calls; } + void print_histo() { + gclog_or_tty->print_cr("Card table value histogram:"); + for (int i = 0; i < 256; i++) { + if (_histo[i] != 0) { + gclog_or_tty->print_cr(" %d: %d", i, _histo[i]); + } + } + } +}; + +class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure { + int _calls; + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; +public: + RedirtyLoggedCardTableEntryClosure() : + _calls(0) + { + _g1h = G1CollectedHeap::heap(); + _ctbs = (CardTableModRefBS*)_g1h->barrier_set(); + } + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) { + _calls++; + *card_ptr = 0; + } + return true; + } + int calls() { return _calls; } +}; + +YoungList::YoungList(G1CollectedHeap* g1h) + : _g1h(g1h), _head(NULL), + _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), + _length(0), _scan_only_length(0), + _last_sampled_rs_lengths(0), + _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0) +{ + guarantee( check_list_empty(false), "just making sure..." ); +} + +void YoungList::push_region(HeapRegion *hr) { + assert(!hr->is_young(), "should not already be young"); + assert(hr->get_next_young_region() == NULL, "cause it should!"); + + hr->set_next_young_region(_head); + _head = hr; + + hr->set_young(); + double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length); + ++_length; +} + +void YoungList::add_survivor_region(HeapRegion* hr) { + assert(!hr->is_survivor(), "should not already be for survived"); + assert(hr->get_next_young_region() == NULL, "cause it should!"); + + hr->set_next_young_region(_survivor_head); + if (_survivor_head == NULL) { + _survivors_tail = hr; + } + _survivor_head = hr; + + hr->set_survivor(); + ++_survivor_length; +} + +HeapRegion* YoungList::pop_region() { + while (_head != NULL) { + assert( length() > 0, "list should not be empty" ); + HeapRegion* ret = _head; + _head = ret->get_next_young_region(); + ret->set_next_young_region(NULL); + --_length; + assert(ret->is_young(), "region should be very young"); + + // Replace 'Survivor' region type with 'Young'. So the region will + // be treated as a young region and will not be 'confused' with + // newly created survivor regions. + if (ret->is_survivor()) { + ret->set_young(); + } + + if (!ret->is_scan_only()) { + return ret; + } + + // scan-only, we'll add it to the scan-only list + if (_scan_only_tail == NULL) { + guarantee( _scan_only_head == NULL, "invariant" ); + + _scan_only_head = ret; + _curr_scan_only = ret; + } else { + guarantee( _scan_only_head != NULL, "invariant" ); + _scan_only_tail->set_next_young_region(ret); + } + guarantee( ret->get_next_young_region() == NULL, "invariant" ); + _scan_only_tail = ret; + + // no need to be tagged as scan-only any more + ret->set_young(); + + ++_scan_only_length; + } + assert( length() == 0, "list should be empty" ); + return NULL; +} + +void YoungList::empty_list(HeapRegion* list) { + while (list != NULL) { + HeapRegion* next = list->get_next_young_region(); + list->set_next_young_region(NULL); + list->uninstall_surv_rate_group(); + list->set_not_young(); + list = next; + } +} + +void YoungList::empty_list() { + assert(check_list_well_formed(), "young list should be well formed"); + + empty_list(_head); + _head = NULL; + _length = 0; + + empty_list(_scan_only_head); + _scan_only_head = NULL; + _scan_only_tail = NULL; + _scan_only_length = 0; + _curr_scan_only = NULL; + + empty_list(_survivor_head); + _survivor_head = NULL; + _survivors_tail = NULL; + _survivor_length = 0; + + _last_sampled_rs_lengths = 0; + + assert(check_list_empty(false), "just making sure..."); +} + +bool YoungList::check_list_well_formed() { + bool ret = true; + + size_t length = 0; + HeapRegion* curr = _head; + HeapRegion* last = NULL; + while (curr != NULL) { + if (!curr->is_young() || curr->is_scan_only()) { + gclog_or_tty->print_cr("### YOUNG REGION "PTR_FORMAT"-"PTR_FORMAT" " + "incorrectly tagged (%d, %d)", + curr->bottom(), curr->end(), + curr->is_young(), curr->is_scan_only()); + ret = false; + } + ++length; + last = curr; + curr = curr->get_next_young_region(); + } + ret = ret && (length == _length); + + if (!ret) { + gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!"); + gclog_or_tty->print_cr("### list has %d entries, _length is %d", + length, _length); + } + + bool scan_only_ret = true; + length = 0; + curr = _scan_only_head; + last = NULL; + while (curr != NULL) { + if (!curr->is_young() || curr->is_scan_only()) { + gclog_or_tty->print_cr("### SCAN-ONLY REGION "PTR_FORMAT"-"PTR_FORMAT" " + "incorrectly tagged (%d, %d)", + curr->bottom(), curr->end(), + curr->is_young(), curr->is_scan_only()); + scan_only_ret = false; + } + ++length; + last = curr; + curr = curr->get_next_young_region(); + } + scan_only_ret = scan_only_ret && (length == _scan_only_length); + + if ( (last != _scan_only_tail) || + (_scan_only_head == NULL && _scan_only_tail != NULL) || + (_scan_only_head != NULL && _scan_only_tail == NULL) ) { + gclog_or_tty->print_cr("## _scan_only_tail is set incorrectly"); + scan_only_ret = false; + } + + if (_curr_scan_only != NULL && _curr_scan_only != _scan_only_head) { + gclog_or_tty->print_cr("### _curr_scan_only is set incorrectly"); + scan_only_ret = false; + } + + if (!scan_only_ret) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST seems not well formed!"); + gclog_or_tty->print_cr("### list has %d entries, _scan_only_length is %d", + length, _scan_only_length); + } + + return ret && scan_only_ret; +} + +bool YoungList::check_list_empty(bool ignore_scan_only_list, + bool check_sample) { + bool ret = true; + + if (_length != 0) { + gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d", + _length); + ret = false; + } + if (check_sample && _last_sampled_rs_lengths != 0) { + gclog_or_tty->print_cr("### YOUNG LIST has non-zero last sampled RS lengths"); + ret = false; + } + if (_head != NULL) { + gclog_or_tty->print_cr("### YOUNG LIST does not have a NULL head"); + ret = false; + } + if (!ret) { + gclog_or_tty->print_cr("### YOUNG LIST does not seem empty"); + } + + if (ignore_scan_only_list) + return ret; + + bool scan_only_ret = true; + if (_scan_only_length != 0) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST should have 0 length, not %d", + _scan_only_length); + scan_only_ret = false; + } + if (_scan_only_head != NULL) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL head"); + scan_only_ret = false; + } + if (_scan_only_tail != NULL) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL tail"); + scan_only_ret = false; + } + if (!scan_only_ret) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not seem empty"); + } + + return ret && scan_only_ret; +} + +void +YoungList::rs_length_sampling_init() { + _sampled_rs_lengths = 0; + _curr = _head; +} + +bool +YoungList::rs_length_sampling_more() { + return _curr != NULL; +} + +void +YoungList::rs_length_sampling_next() { + assert( _curr != NULL, "invariant" ); + _sampled_rs_lengths += _curr->rem_set()->occupied(); + _curr = _curr->get_next_young_region(); + if (_curr == NULL) { + _last_sampled_rs_lengths = _sampled_rs_lengths; + // gclog_or_tty->print_cr("last sampled RS lengths = %d", _last_sampled_rs_lengths); + } +} + +void +YoungList::reset_auxilary_lists() { + // We could have just "moved" the scan-only list to the young list. + // However, the scan-only list is ordered according to the region + // age in descending order, so, by moving one entry at a time, we + // ensure that it is recreated in ascending order. + + guarantee( is_empty(), "young list should be empty" ); + assert(check_list_well_formed(), "young list should be well formed"); + + // Add survivor regions to SurvRateGroup. + _g1h->g1_policy()->note_start_adding_survivor_regions(); + for (HeapRegion* curr = _survivor_head; + curr != NULL; + curr = curr->get_next_young_region()) { + _g1h->g1_policy()->set_region_survivors(curr); + } + _g1h->g1_policy()->note_stop_adding_survivor_regions(); + + if (_survivor_head != NULL) { + _head = _survivor_head; + _length = _survivor_length + _scan_only_length; + _survivors_tail->set_next_young_region(_scan_only_head); + } else { + _head = _scan_only_head; + _length = _scan_only_length; + } + + for (HeapRegion* curr = _scan_only_head; + curr != NULL; + curr = curr->get_next_young_region()) { + curr->recalculate_age_in_surv_rate_group(); + } + _scan_only_head = NULL; + _scan_only_tail = NULL; + _scan_only_length = 0; + _curr_scan_only = NULL; + + _survivor_head = NULL; + _survivors_tail = NULL; + _survivor_length = 0; + _g1h->g1_policy()->finished_recalculating_age_indexes(); + + assert(check_list_well_formed(), "young list should be well formed"); +} + +void YoungList::print() { + HeapRegion* lists[] = {_head, _scan_only_head, _survivor_head}; + const char* names[] = {"YOUNG", "SCAN-ONLY", "SURVIVOR"}; + + for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) { + gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]); + HeapRegion *curr = lists[list]; + if (curr == NULL) + gclog_or_tty->print_cr(" empty"); + while (curr != NULL) { + gclog_or_tty->print_cr(" [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, " + "age: %4d, y: %d, s-o: %d, surv: %d", + curr->bottom(), curr->end(), + curr->top(), + curr->prev_top_at_mark_start(), + curr->next_top_at_mark_start(), + curr->top_at_conc_mark_count(), + curr->age_in_surv_rate_group_cond(), + curr->is_young(), + curr->is_scan_only(), + curr->is_survivor()); + curr = curr->get_next_young_region(); + } + } + + gclog_or_tty->print_cr(""); +} + +void G1CollectedHeap::stop_conc_gc_threads() { + _cg1r->cg1rThread()->stop(); + _czft->stop(); + _cmThread->stop(); +} + + +void G1CollectedHeap::check_ct_logs_at_safepoint() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set(); + + // Count the dirty cards at the start. + CountNonCleanMemRegionClosure count1(this); + ct_bs->mod_card_iterate(&count1); + int orig_count = count1.n(); + + // First clear the logged cards. + ClearLoggedCardTableEntryClosure clear; + dcqs.set_closure(&clear); + dcqs.apply_closure_to_all_completed_buffers(); + dcqs.iterate_closure_all_threads(false); + clear.print_histo(); + + // Now ensure that there's no dirty cards. + CountNonCleanMemRegionClosure count2(this); + ct_bs->mod_card_iterate(&count2); + if (count2.n() != 0) { + gclog_or_tty->print_cr("Card table has %d entries; %d originally", + count2.n(), orig_count); + } + guarantee(count2.n() == 0, "Card table should be clean."); + + RedirtyLoggedCardTableEntryClosure redirty; + JavaThread::dirty_card_queue_set().set_closure(&redirty); + dcqs.apply_closure_to_all_completed_buffers(); + dcqs.iterate_closure_all_threads(false); + gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.", + clear.calls(), orig_count); + guarantee(redirty.calls() == clear.calls(), + "Or else mechanism is broken."); + + CountNonCleanMemRegionClosure count3(this); + ct_bs->mod_card_iterate(&count3); + if (count3.n() != orig_count) { + gclog_or_tty->print_cr("Should have restored them all: orig = %d, final = %d.", + orig_count, count3.n()); + guarantee(count3.n() >= orig_count, "Should have restored them all."); + } + + JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl); +} + +// Private class members. + +G1CollectedHeap* G1CollectedHeap::_g1h; + +// Private methods. + +// Finds a HeapRegion that can be used to allocate a given size of block. + + +HeapRegion* G1CollectedHeap::newAllocRegion_work(size_t word_size, + bool do_expand, + bool zero_filled) { + ConcurrentZFThread::note_region_alloc(); + HeapRegion* res = alloc_free_region_from_lists(zero_filled); + if (res == NULL && do_expand) { + expand(word_size * HeapWordSize); + res = alloc_free_region_from_lists(zero_filled); + assert(res == NULL || + (!res->isHumongous() && + (!zero_filled || + res->zero_fill_state() == HeapRegion::Allocated)), + "Alloc Regions must be zero filled (and non-H)"); + } + if (res != NULL && res->is_empty()) _free_regions--; + assert(res == NULL || + (!res->isHumongous() && + (!zero_filled || + res->zero_fill_state() == HeapRegion::Allocated)), + "Non-young alloc Regions must be zero filled (and non-H)"); + + if (G1TraceRegions) { + if (res != NULL) { + gclog_or_tty->print_cr("new alloc region %d:["PTR_FORMAT", "PTR_FORMAT"], " + "top "PTR_FORMAT, + res->hrs_index(), res->bottom(), res->end(), res->top()); + } + } + + return res; +} + +HeapRegion* G1CollectedHeap::newAllocRegionWithExpansion(int purpose, + size_t word_size, + bool zero_filled) { + HeapRegion* alloc_region = NULL; + if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) { + alloc_region = newAllocRegion_work(word_size, true, zero_filled); + if (purpose == GCAllocForSurvived && alloc_region != NULL) { + _young_list->add_survivor_region(alloc_region); + } + ++_gc_alloc_region_counts[purpose]; + } else { + g1_policy()->note_alloc_region_limit_reached(purpose); + } + return alloc_region; +} + +// If could fit into free regions w/o expansion, try. +// Otherwise, if can expand, do so. +// Otherwise, if using ex regions might help, try with ex given back. +HeapWord* G1CollectedHeap::humongousObjAllocate(size_t word_size) { + assert(regions_accounted_for(), "Region leakage!"); + + // We can't allocate H regions while cleanupComplete is running, since + // some of the regions we find to be empty might not yet be added to the + // unclean list. (If we're already at a safepoint, this call is + // unnecessary, not to mention wrong.) + if (!SafepointSynchronize::is_at_safepoint()) + wait_for_cleanup_complete(); + + size_t num_regions = + round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords; + + // Special case if < one region??? + + // Remember the ft size. + size_t x_size = expansion_regions(); + + HeapWord* res = NULL; + bool eliminated_allocated_from_lists = false; + + // Can the allocation potentially fit in the free regions? + if (free_regions() >= num_regions) { + res = _hrs->obj_allocate(word_size); + } + if (res == NULL) { + // Try expansion. + size_t fs = _hrs->free_suffix(); + if (fs + x_size >= num_regions) { + expand((num_regions - fs) * HeapRegion::GrainBytes); + res = _hrs->obj_allocate(word_size); + assert(res != NULL, "This should have worked."); + } else { + // Expansion won't help. Are there enough free regions if we get rid + // of reservations? + size_t avail = free_regions(); + if (avail >= num_regions) { + res = _hrs->obj_allocate(word_size); + if (res != NULL) { + remove_allocated_regions_from_lists(); + eliminated_allocated_from_lists = true; + } + } + } + } + if (res != NULL) { + // Increment by the number of regions allocated. + // FIXME: Assumes regions all of size GrainBytes. +#ifndef PRODUCT + mr_bs()->verify_clean_region(MemRegion(res, res + num_regions * + HeapRegion::GrainWords)); +#endif + if (!eliminated_allocated_from_lists) + remove_allocated_regions_from_lists(); + _summary_bytes_used += word_size * HeapWordSize; + _free_regions -= num_regions; + _num_humongous_regions += (int) num_regions; + } + assert(regions_accounted_for(), "Region Leakage"); + return res; +} + +HeapWord* +G1CollectedHeap::attempt_allocation_slow(size_t word_size, + bool permit_collection_pause) { + HeapWord* res = NULL; + HeapRegion* allocated_young_region = NULL; + + assert( SafepointSynchronize::is_at_safepoint() || + Heap_lock->owned_by_self(), "pre condition of the call" ); + + if (isHumongous(word_size)) { + // Allocation of a humongous object can, in a sense, complete a + // partial region, if the previous alloc was also humongous, and + // caused the test below to succeed. + if (permit_collection_pause) + do_collection_pause_if_appropriate(word_size); + res = humongousObjAllocate(word_size); + assert(_cur_alloc_region == NULL + || !_cur_alloc_region->isHumongous(), + "Prevent a regression of this bug."); + + } else { + // If we do a collection pause, this will be reset to a non-NULL + // value. If we don't, nulling here ensures that we allocate a new + // region below. + if (_cur_alloc_region != NULL) { + // We're finished with the _cur_alloc_region. + _summary_bytes_used += _cur_alloc_region->used(); + _cur_alloc_region = NULL; + } + assert(_cur_alloc_region == NULL, "Invariant."); + // Completion of a heap region is perhaps a good point at which to do + // a collection pause. + if (permit_collection_pause) + do_collection_pause_if_appropriate(word_size); + // Make sure we have an allocation region available. + if (_cur_alloc_region == NULL) { + if (!SafepointSynchronize::is_at_safepoint()) + wait_for_cleanup_complete(); + bool next_is_young = should_set_young_locked(); + // If the next region is not young, make sure it's zero-filled. + _cur_alloc_region = newAllocRegion(word_size, !next_is_young); + if (_cur_alloc_region != NULL) { + _summary_bytes_used -= _cur_alloc_region->used(); + if (next_is_young) { + set_region_short_lived_locked(_cur_alloc_region); + allocated_young_region = _cur_alloc_region; + } + } + } + assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(), + "Prevent a regression of this bug."); + + // Now retry the allocation. + if (_cur_alloc_region != NULL) { + res = _cur_alloc_region->allocate(word_size); + } + } + + // NOTE: fails frequently in PRT + assert(regions_accounted_for(), "Region leakage!"); + + if (res != NULL) { + if (!SafepointSynchronize::is_at_safepoint()) { + assert( permit_collection_pause, "invariant" ); + assert( Heap_lock->owned_by_self(), "invariant" ); + Heap_lock->unlock(); + } + + if (allocated_young_region != NULL) { + HeapRegion* hr = allocated_young_region; + HeapWord* bottom = hr->bottom(); + HeapWord* end = hr->end(); + MemRegion mr(bottom, end); + ((CardTableModRefBS*)_g1h->barrier_set())->dirty(mr); + } + } + + assert( SafepointSynchronize::is_at_safepoint() || + (res == NULL && Heap_lock->owned_by_self()) || + (res != NULL && !Heap_lock->owned_by_self()), + "post condition of the call" ); + + return res; +} + +HeapWord* +G1CollectedHeap::mem_allocate(size_t word_size, + bool is_noref, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded) { + debug_only(check_for_valid_allocation_state()); + assert(no_gc_in_progress(), "Allocation during gc not allowed"); + HeapWord* result = NULL; + + // Loop until the allocation is satisified, + // or unsatisfied after GC. + for (int try_count = 1; /* return or throw */; try_count += 1) { + int gc_count_before; + { + Heap_lock->lock(); + result = attempt_allocation(word_size); + if (result != NULL) { + // attempt_allocation should have unlocked the heap lock + assert(is_in(result), "result not in heap"); + return result; + } + // Read the gc count while the heap lock is held. + gc_count_before = SharedHeap::heap()->total_collections(); + Heap_lock->unlock(); + } + + // Create the garbage collection operation... + VM_G1CollectForAllocation op(word_size, + gc_count_before); + + // ...and get the VM thread to execute it. + VMThread::execute(&op); + if (op.prologue_succeeded()) { + result = op.result(); + assert(result == NULL || is_in(result), "result not in heap"); + return result; + } + + // Give a warning if we seem to be looping forever. + if ((QueuedAllocationWarningCount > 0) && + (try_count % QueuedAllocationWarningCount == 0)) { + warning("G1CollectedHeap::mem_allocate_work retries %d times", + try_count); + } + } +} + +void G1CollectedHeap::abandon_cur_alloc_region() { + if (_cur_alloc_region != NULL) { + // We're finished with the _cur_alloc_region. + if (_cur_alloc_region->is_empty()) { + _free_regions++; + free_region(_cur_alloc_region); + } else { + _summary_bytes_used += _cur_alloc_region->used(); + } + _cur_alloc_region = NULL; + } +} + +class PostMCRemSetClearClosure: public HeapRegionClosure { + ModRefBarrierSet* _mr_bs; +public: + PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {} + bool doHeapRegion(HeapRegion* r) { + r->reset_gc_time_stamp(); + if (r->continuesHumongous()) + return false; + HeapRegionRemSet* hrrs = r->rem_set(); + if (hrrs != NULL) hrrs->clear(); + // You might think here that we could clear just the cards + // corresponding to the used region. But no: if we leave a dirty card + // in a region we might allocate into, then it would prevent that card + // from being enqueued, and cause it to be missed. + // Re: the performance cost: we shouldn't be doing full GC anyway! + _mr_bs->clear(MemRegion(r->bottom(), r->end())); + return false; + } +}; + + +class PostMCRemSetInvalidateClosure: public HeapRegionClosure { + ModRefBarrierSet* _mr_bs; +public: + PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->used_region().word_size() != 0) { + _mr_bs->invalidate(r->used_region(), true /*whole heap*/); + } + return false; + } +}; + +void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, + size_t word_size) { + ResourceMark rm; + + if (full && DisableExplicitGC) { + gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n"); + return; + } + + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread"); + + if (GC_locker::is_active()) { + return; // GC is disabled (e.g. JNI GetXXXCritical operation) + } + + { + IsGCActiveMark x; + + // Timing + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + g1_policy()->record_full_collection_start(); + + gc_prologue(true); + increment_total_collections(); + + size_t g1h_prev_used = used(); + assert(used() == recalculate_used(), "Should be equal"); + + if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + prepare_for_verify(); + gclog_or_tty->print(" VerifyBeforeGC:"); + Universe::verify(true); + } + assert(regions_accounted_for(), "Region leakage!"); + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + // We want to discover references, but not process them yet. + // This mode is disabled in + // instanceRefKlass::process_discovered_references if the + // generation does some collection work, or + // instanceRefKlass::enqueue_discovered_references if the + // generation returns without doing any work. + ref_processor()->disable_discovery(); + ref_processor()->abandon_partial_discovery(); + ref_processor()->verify_no_references_recorded(); + + // Abandon current iterations of concurrent marking and concurrent + // refinement, if any are in progress. + concurrent_mark()->abort(); + + // Make sure we'll choose a new allocation region afterwards. + abandon_cur_alloc_region(); + assert(_cur_alloc_region == NULL, "Invariant."); + g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS(); + tear_down_region_lists(); + set_used_regions_to_need_zero_fill(); + if (g1_policy()->in_young_gc_mode()) { + empty_young_list(); + g1_policy()->set_full_young_gcs(true); + } + + // Temporarily make reference _discovery_ single threaded (non-MT). + ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false); + + // Temporarily make refs discovery atomic + ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true); + + // Temporarily clear _is_alive_non_header + ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL); + + ref_processor()->enable_discovery(); + + // Do collection work + { + HandleMark hm; // Discard invalid handles created during gc + G1MarkSweep::invoke_at_safepoint(ref_processor(), clear_all_soft_refs); + } + // Because freeing humongous regions may have added some unclean + // regions, it is necessary to tear down again before rebuilding. + tear_down_region_lists(); + rebuild_region_lists(); + + _summary_bytes_used = recalculate_used(); + + ref_processor()->enqueue_discovered_references(); + + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + gclog_or_tty->print(" VerifyAfterGC:"); + Universe::verify(false); + } + NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); + + reset_gc_time_stamp(); + // Since everything potentially moved, we will clear all remembered + // sets, and clear all cards. Later we will also cards in the used + // portion of the heap after the resizing (which could be a shrinking.) + // We will also reset the GC time stamps of the regions. + PostMCRemSetClearClosure rs_clear(mr_bs()); + heap_region_iterate(&rs_clear); + + // Resize the heap if necessary. + resize_if_necessary_after_full_collection(full ? 0 : word_size); + + // Since everything potentially moved, we will clear all remembered + // sets, but also dirty all cards corresponding to used regions. + PostMCRemSetInvalidateClosure rs_invalidate(mr_bs()); + heap_region_iterate(&rs_invalidate); + if (_cg1r->use_cache()) { + _cg1r->clear_and_record_card_counts(); + _cg1r->clear_hot_cache(); + } + + if (PrintGC) { + print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity()); + } + + if (true) { // FIXME + // Ask the permanent generation to adjust size for full collections + perm()->compute_new_size(); + } + + double end = os::elapsedTime(); + GCOverheadReporter::recordSTWEnd(end); + g1_policy()->record_full_collection_end(); + + gc_epilogue(true); + + // Abandon concurrent refinement. This must happen last: in the + // dirty-card logging system, some cards may be dirty by weak-ref + // processing, and may be enqueued. But the whole card table is + // dirtied, so this should abandon those logs, and set "do_traversal" + // to true. + concurrent_g1_refine()->set_pya_restart(); + + assert(regions_accounted_for(), "Region leakage!"); + } + + if (g1_policy()->in_young_gc_mode()) { + _young_list->reset_sampled_info(); + assert( check_young_list_empty(false, false), + "young list should be empty at this point"); + } +} + +void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) { + do_collection(true, clear_all_soft_refs, 0); +} + +// This code is mostly copied from TenuredGeneration. +void +G1CollectedHeap:: +resize_if_necessary_after_full_collection(size_t word_size) { + assert(MinHeapFreeRatio <= MaxHeapFreeRatio, "sanity check"); + + // Include the current allocation, if any, and bytes that will be + // pre-allocated to support collections, as "used". + const size_t used_after_gc = used(); + const size_t capacity_after_gc = capacity(); + const size_t free_after_gc = capacity_after_gc - used_after_gc; + + // We don't have floating point command-line arguments + const double minimum_free_percentage = (double) MinHeapFreeRatio / 100; + const double maximum_used_percentage = 1.0 - minimum_free_percentage; + const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100; + const double minimum_used_percentage = 1.0 - maximum_free_percentage; + + size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage); + size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage); + + // Don't shrink less than the initial size. + minimum_desired_capacity = + MAX2(minimum_desired_capacity, + collector_policy()->initial_heap_byte_size()); + maximum_desired_capacity = + MAX2(maximum_desired_capacity, + collector_policy()->initial_heap_byte_size()); + + // We are failing here because minimum_desired_capacity is + assert(used_after_gc <= minimum_desired_capacity, "sanity check"); + assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check"); + + if (PrintGC && Verbose) { + const double free_percentage = ((double)free_after_gc) / capacity(); + gclog_or_tty->print_cr("Computing new size after full GC "); + gclog_or_tty->print_cr(" " + " minimum_free_percentage: %6.2f", + minimum_free_percentage); + gclog_or_tty->print_cr(" " + " maximum_free_percentage: %6.2f", + maximum_free_percentage); + gclog_or_tty->print_cr(" " + " capacity: %6.1fK" + " minimum_desired_capacity: %6.1fK" + " maximum_desired_capacity: %6.1fK", + capacity() / (double) K, + minimum_desired_capacity / (double) K, + maximum_desired_capacity / (double) K); + gclog_or_tty->print_cr(" " + " free_after_gc : %6.1fK" + " used_after_gc : %6.1fK", + free_after_gc / (double) K, + used_after_gc / (double) K); + gclog_or_tty->print_cr(" " + " free_percentage: %6.2f", + free_percentage); + } + if (capacity() < minimum_desired_capacity) { + // Don't expand unless it's significant + size_t expand_bytes = minimum_desired_capacity - capacity_after_gc; + expand(expand_bytes); + if (PrintGC && Verbose) { + gclog_or_tty->print_cr(" expanding:" + " minimum_desired_capacity: %6.1fK" + " expand_bytes: %6.1fK", + minimum_desired_capacity / (double) K, + expand_bytes / (double) K); + } + + // No expansion, now see if we want to shrink + } else if (capacity() > maximum_desired_capacity) { + // Capacity too large, compute shrinking size + size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity; + shrink(shrink_bytes); + if (PrintGC && Verbose) { + gclog_or_tty->print_cr(" " + " shrinking:" + " initSize: %.1fK" + " maximum_desired_capacity: %.1fK", + collector_policy()->initial_heap_byte_size() / (double) K, + maximum_desired_capacity / (double) K); + gclog_or_tty->print_cr(" " + " shrink_bytes: %.1fK", + shrink_bytes / (double) K); + } + } +} + + +HeapWord* +G1CollectedHeap::satisfy_failed_allocation(size_t word_size) { + HeapWord* result = NULL; + + // In a G1 heap, we're supposed to keep allocation from failing by + // incremental pauses. Therefore, at least for now, we'll favor + // expansion over collection. (This might change in the future if we can + // do something smarter than full collection to satisfy a failed alloc.) + + result = expand_and_allocate(word_size); + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // OK, I guess we have to try collection. + + do_collection(false, false, word_size); + + result = attempt_allocation(word_size, /*permit_collection_pause*/false); + + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // Try collecting soft references. + do_collection(false, true, word_size); + result = attempt_allocation(word_size, /*permit_collection_pause*/false); + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // What else? We might try synchronous finalization later. If the total + // space available is large enough for the allocation, then a more + // complete compaction phase than we've tried so far might be + // appropriate. + return NULL; +} + +// Attempting to expand the heap sufficiently +// to support an allocation of the given "word_size". If +// successful, perform the allocation and return the address of the +// allocated block, or else "NULL". + +HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) { + size_t expand_bytes = word_size * HeapWordSize; + if (expand_bytes < MinHeapDeltaBytes) { + expand_bytes = MinHeapDeltaBytes; + } + expand(expand_bytes); + assert(regions_accounted_for(), "Region leakage!"); + HeapWord* result = attempt_allocation(word_size, false /* permit_collection_pause */); + return result; +} + +size_t G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr) { + size_t pre_used = 0; + size_t cleared_h_regions = 0; + size_t freed_regions = 0; + UncleanRegionList local_list; + free_region_if_totally_empty_work(hr, pre_used, cleared_h_regions, + freed_regions, &local_list); + + finish_free_region_work(pre_used, cleared_h_regions, freed_regions, + &local_list); + return pre_used; +} + +void +G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h, + size_t& freed_regions, + UncleanRegionList* list, + bool par) { + assert(!hr->continuesHumongous(), "should have filtered these out"); + size_t res = 0; + if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) { + if (!hr->is_young()) { + if (G1PolicyVerbose > 0) + gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)" + " during cleanup", hr, hr->used()); + free_region_work(hr, pre_used, cleared_h, freed_regions, list, par); + } + } +} + +// FIXME: both this and shrink could probably be more efficient by +// doing one "VirtualSpace::expand_by" call rather than several. +void G1CollectedHeap::expand(size_t expand_bytes) { + size_t old_mem_size = _g1_storage.committed_size(); + // We expand by a minimum of 1K. + expand_bytes = MAX2(expand_bytes, (size_t)K); + size_t aligned_expand_bytes = + ReservedSpace::page_align_size_up(expand_bytes); + aligned_expand_bytes = align_size_up(aligned_expand_bytes, + HeapRegion::GrainBytes); + expand_bytes = aligned_expand_bytes; + while (expand_bytes > 0) { + HeapWord* base = (HeapWord*)_g1_storage.high(); + // Commit more storage. + bool successful = _g1_storage.expand_by(HeapRegion::GrainBytes); + if (!successful) { + expand_bytes = 0; + } else { + expand_bytes -= HeapRegion::GrainBytes; + // Expand the committed region. + HeapWord* high = (HeapWord*) _g1_storage.high(); + _g1_committed.set_end(high); + // Create a new HeapRegion. + MemRegion mr(base, high); + bool is_zeroed = !_g1_max_committed.contains(base); + HeapRegion* hr = new HeapRegion(_bot_shared, mr, is_zeroed); + + // Now update max_committed if necessary. + _g1_max_committed.set_end(MAX2(_g1_max_committed.end(), high)); + + // Add it to the HeapRegionSeq. + _hrs->insert(hr); + // Set the zero-fill state, according to whether it's already + // zeroed. + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + if (is_zeroed) { + hr->set_zero_fill_complete(); + put_free_region_on_list_locked(hr); + } else { + hr->set_zero_fill_needed(); + put_region_on_unclean_list_locked(hr); + } + } + _free_regions++; + // And we used up an expansion region to create it. + _expansion_regions--; + // Tell the cardtable about it. + Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); + // And the offset table as well. + _bot_shared->resize(_g1_committed.word_size()); + } + } + if (Verbose && PrintGC) { + size_t new_mem_size = _g1_storage.committed_size(); + gclog_or_tty->print_cr("Expanding garbage-first heap from %ldK by %ldK to %ldK", + old_mem_size/K, aligned_expand_bytes/K, + new_mem_size/K); + } +} + +void G1CollectedHeap::shrink_helper(size_t shrink_bytes) +{ + size_t old_mem_size = _g1_storage.committed_size(); + size_t aligned_shrink_bytes = + ReservedSpace::page_align_size_down(shrink_bytes); + aligned_shrink_bytes = align_size_down(aligned_shrink_bytes, + HeapRegion::GrainBytes); + size_t num_regions_deleted = 0; + MemRegion mr = _hrs->shrink_by(aligned_shrink_bytes, num_regions_deleted); + + assert(mr.end() == (HeapWord*)_g1_storage.high(), "Bad shrink!"); + if (mr.byte_size() > 0) + _g1_storage.shrink_by(mr.byte_size()); + assert(mr.start() == (HeapWord*)_g1_storage.high(), "Bad shrink!"); + + _g1_committed.set_end(mr.start()); + _free_regions -= num_regions_deleted; + _expansion_regions += num_regions_deleted; + + // Tell the cardtable about it. + Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); + + // And the offset table as well. + _bot_shared->resize(_g1_committed.word_size()); + + HeapRegionRemSet::shrink_heap(n_regions()); + + if (Verbose && PrintGC) { + size_t new_mem_size = _g1_storage.committed_size(); + gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK", + old_mem_size/K, aligned_shrink_bytes/K, + new_mem_size/K); + } +} + +void G1CollectedHeap::shrink(size_t shrink_bytes) { + release_gc_alloc_regions(); + tear_down_region_lists(); // We will rebuild them in a moment. + shrink_helper(shrink_bytes); + rebuild_region_lists(); +} + +// Public methods. + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : + SharedHeap(policy_), + _g1_policy(policy_), + _ref_processor(NULL), + _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), + _bot_shared(NULL), + _par_alloc_during_gc_lock(Mutex::leaf, "par alloc during GC lock"), + _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL), + _evac_failure_scan_stack(NULL) , + _mark_in_progress(false), + _cg1r(NULL), _czft(NULL), _summary_bytes_used(0), + _cur_alloc_region(NULL), + _refine_cte_cl(NULL), + _free_region_list(NULL), _free_region_list_size(0), + _free_regions(0), + _popular_object_boundary(NULL), + _cur_pop_hr_index(0), + _popular_regions_to_be_evacuated(NULL), + _pop_obj_rc_at_copy(), + _full_collection(false), + _unclean_region_list(), + _unclean_regions_coming(false), + _young_list(new YoungList(this)), + _gc_time_stamp(0), + _surviving_young_words(NULL) +{ + _g1h = this; // To catch bugs. + if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { + vm_exit_during_initialization("Failed necessary allocation."); + } + int n_queues = MAX2((int)ParallelGCThreads, 1); + _task_queues = new RefToScanQueueSet(n_queues); + + int n_rem_sets = HeapRegionRemSet::num_par_rem_sets(); + assert(n_rem_sets > 0, "Invariant."); + + HeapRegionRemSetIterator** iter_arr = + NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues); + for (int i = 0; i < n_queues; i++) { + iter_arr[i] = new HeapRegionRemSetIterator(); + } + _rem_set_iterator = iter_arr; + + for (int i = 0; i < n_queues; i++) { + RefToScanQueue* q = new RefToScanQueue(); + q->initialize(); + _task_queues->register_queue(i, q); + } + + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + _gc_alloc_regions[ap] = NULL; + _gc_alloc_region_counts[ap] = 0; + } + guarantee(_task_queues != NULL, "task_queues allocation failure."); +} + +jint G1CollectedHeap::initialize() { + os::enable_vtime(); + + // Necessary to satisfy locking discipline assertions. + + MutexLocker x(Heap_lock); + + // While there are no constraints in the GC code that HeapWordSize + // be any particular value, there are multiple other areas in the + // system which believe this to be true (e.g. oop->object_size in some + // cases incorrectly returns the size in wordSize units rather than + // HeapWordSize). + guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize"); + + size_t init_byte_size = collector_policy()->initial_heap_byte_size(); + size_t max_byte_size = collector_policy()->max_heap_byte_size(); + + // Ensure that the sizes are properly aligned. + Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); + Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); + + // We allocate this in any case, but only do no work if the command line + // param is off. + _cg1r = new ConcurrentG1Refine(); + + // Reserve the maximum. + PermanentGenerationSpec* pgs = collector_policy()->permanent_generation(); + // Includes the perm-gen. + ReservedSpace heap_rs(max_byte_size + pgs->max_size(), + HeapRegion::GrainBytes, + false /*ism*/); + + if (!heap_rs.is_reserved()) { + vm_exit_during_initialization("Could not reserve enough space for object heap"); + return JNI_ENOMEM; + } + + // It is important to do this in a way such that concurrent readers can't + // temporarily think somethings in the heap. (I've actually seen this + // happen in asserts: DLD.) + _reserved.set_word_size(0); + _reserved.set_start((HeapWord*)heap_rs.base()); + _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size())); + + _expansion_regions = max_byte_size/HeapRegion::GrainBytes; + + _num_humongous_regions = 0; + + // Create the gen rem set (and barrier set) for the entire reserved region. + _rem_set = collector_policy()->create_rem_set(_reserved, 2); + set_barrier_set(rem_set()->bs()); + if (barrier_set()->is_a(BarrierSet::ModRef)) { + _mr_bs = (ModRefBarrierSet*)_barrier_set; + } else { + vm_exit_during_initialization("G1 requires a mod ref bs."); + return JNI_ENOMEM; + } + + // Also create a G1 rem set. + if (G1UseHRIntoRS) { + if (mr_bs()->is_a(BarrierSet::CardTableModRef)) { + _g1_rem_set = new HRInto_G1RemSet(this, (CardTableModRefBS*)mr_bs()); + } else { + vm_exit_during_initialization("G1 requires a cardtable mod ref bs."); + return JNI_ENOMEM; + } + } else { + _g1_rem_set = new StupidG1RemSet(this); + } + + // Carve out the G1 part of the heap. + + ReservedSpace g1_rs = heap_rs.first_part(max_byte_size); + _g1_reserved = MemRegion((HeapWord*)g1_rs.base(), + g1_rs.size()/HeapWordSize); + ReservedSpace perm_gen_rs = heap_rs.last_part(max_byte_size); + + _perm_gen = pgs->init(perm_gen_rs, pgs->init_size(), rem_set()); + + _g1_storage.initialize(g1_rs, 0); + _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0); + _g1_max_committed = _g1_committed; + _hrs = new HeapRegionSeq(); + guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq"); + guarantee(_cur_alloc_region == NULL, "from constructor"); + + _bot_shared = new G1BlockOffsetSharedArray(_reserved, + heap_word_size(init_byte_size)); + + _g1h = this; + + // Create the ConcurrentMark data structure and thread. + // (Must do this late, so that "max_regions" is defined.) + _cm = new ConcurrentMark(heap_rs, (int) max_regions()); + _cmThread = _cm->cmThread(); + + // ...and the concurrent zero-fill thread, if necessary. + if (G1ConcZeroFill) { + _czft = new ConcurrentZFThread(); + } + + + + // Allocate the popular regions; take them off free lists. + size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes; + expand(pop_byte_size); + _popular_object_boundary = + _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords); + for (int i = 0; i < G1NumPopularRegions; i++) { + HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords); + // assert(hr != NULL && hr->bottom() < _popular_object_boundary, + // "Should be enough, and all should be below boundary."); + hr->set_popular(true); + } + assert(_cur_pop_hr_index == 0, "Start allocating at the first region."); + + // Initialize the from_card cache structure of HeapRegionRemSet. + HeapRegionRemSet::init_heap(max_regions()); + + // Now expand into the rest of the initial heap size. + expand(init_byte_size - pop_byte_size); + + // Perform any initialization actions delegated to the policy. + g1_policy()->init(); + + g1_policy()->note_start_of_mark_thread(); + + _refine_cte_cl = + new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(), + g1_rem_set(), + concurrent_g1_refine()); + JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl); + + JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, + SATB_Q_FL_lock, + 0, + Shared_SATB_Q_lock); + if (G1RSBarrierUseQueue) { + JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + G1DirtyCardQueueMax, + Shared_DirtyCardQ_lock); + } + // In case we're keeping closure specialization stats, initialize those + // counts and that mechanism. + SpecializationStats::clear(); + + _gc_alloc_region_list = NULL; + + // Do later initialization work for concurrent refinement. + _cg1r->init(); + + const char* group_names[] = { "CR", "ZF", "CM", "CL" }; + GCOverheadReporter::initGCOverheadReporter(4, group_names); + + return JNI_OK; +} + +void G1CollectedHeap::ref_processing_init() { + SharedHeap::ref_processing_init(); + MemRegion mr = reserved_region(); + _ref_processor = ReferenceProcessor::create_ref_processor( + mr, // span + false, // Reference discovery is not atomic + // (though it shouldn't matter here.) + true, // mt_discovery + NULL, // is alive closure: need to fill this in for efficiency + ParallelGCThreads, + ParallelRefProcEnabled, + true); // Setting next fields of discovered + // lists requires a barrier. +} + +size_t G1CollectedHeap::capacity() const { + return _g1_committed.byte_size(); +} + +void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, + int worker_i) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + int n_completed_buffers = 0; + while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) { + n_completed_buffers++; + } + g1_policy()->record_update_rs_processed_buffers(worker_i, + (double) n_completed_buffers); + dcqs.clear_n_completed_buffers(); + // Finish up the queue... + if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i, + g1_rem_set()); + assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); +} + + +// Computes the sum of the storage used by the various regions. + +size_t G1CollectedHeap::used() const { + assert(Heap_lock->owner() != NULL, + "Should be owned on this thread's behalf."); + size_t result = _summary_bytes_used; + if (_cur_alloc_region != NULL) + result += _cur_alloc_region->used(); + return result; +} + +class SumUsedClosure: public HeapRegionClosure { + size_t _used; +public: + SumUsedClosure() : _used(0) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + _used += r->used(); + } + return false; + } + size_t result() { return _used; } +}; + +size_t G1CollectedHeap::recalculate_used() const { + SumUsedClosure blk; + _hrs->iterate(&blk); + return blk.result(); +} + +#ifndef PRODUCT +class SumUsedRegionsClosure: public HeapRegionClosure { + size_t _num; +public: + // _num is set to 1 to account for the popular region + SumUsedRegionsClosure() : _num(G1NumPopularRegions) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) { + _num += 1; + } + return false; + } + size_t result() { return _num; } +}; + +size_t G1CollectedHeap::recalculate_used_regions() const { + SumUsedRegionsClosure blk; + _hrs->iterate(&blk); + return blk.result(); +} +#endif // PRODUCT + +size_t G1CollectedHeap::unsafe_max_alloc() { + if (_free_regions > 0) return HeapRegion::GrainBytes; + // otherwise, is there space in the current allocation region? + + // We need to store the current allocation region in a local variable + // here. The problem is that this method doesn't take any locks and + // there may be other threads which overwrite the current allocation + // region field. attempt_allocation(), for example, sets it to NULL + // and this can happen *after* the NULL check here but before the call + // to free(), resulting in a SIGSEGV. Note that this doesn't appear + // to be a problem in the optimized build, since the two loads of the + // current allocation region field are optimized away. + HeapRegion* car = _cur_alloc_region; + + // FIXME: should iterate over all regions? + if (car == NULL) { + return 0; + } + return car->free(); +} + +void G1CollectedHeap::collect(GCCause::Cause cause) { + // The caller doesn't have the Heap_lock + assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock"); + MutexLocker ml(Heap_lock); + collect_locked(cause); +} + +void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) { + assert(Thread::current()->is_VM_thread(), "Precondition#1"); + assert(Heap_lock->is_locked(), "Precondition#2"); + GCCauseSetter gcs(this, cause); + switch (cause) { + case GCCause::_heap_inspection: + case GCCause::_heap_dump: { + HandleMark hm; + do_full_collection(false); // don't clear all soft refs + break; + } + default: // XXX FIX ME + ShouldNotReachHere(); // Unexpected use of this function + } +} + + +void G1CollectedHeap::collect_locked(GCCause::Cause cause) { + // Don't want to do a GC until cleanup is completed. + wait_for_cleanup_complete(); + + // Read the GC count while holding the Heap_lock + int gc_count_before = SharedHeap::heap()->total_collections(); + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1CollectFull op(gc_count_before, cause); + VMThread::execute(&op); + } +} + +bool G1CollectedHeap::is_in(const void* p) const { + if (_g1_committed.contains(p)) { + HeapRegion* hr = _hrs->addr_to_region(p); + return hr->is_in(p); + } else { + return _perm_gen->as_gen()->is_in(p); + } +} + +// Iteration functions. + +// Iterates an OopClosure over all ref-containing fields of objects +// within a HeapRegion. + +class IterateOopClosureRegionClosure: public HeapRegionClosure { + MemRegion _mr; + OopClosure* _cl; +public: + IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl) + : _mr(mr), _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + if (! r->continuesHumongous()) { + r->oop_iterate(_cl); + } + return false; + } +}; + +void G1CollectedHeap::oop_iterate(OopClosure* cl) { + IterateOopClosureRegionClosure blk(_g1_committed, cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) { + IterateOopClosureRegionClosure blk(mr, cl); + _hrs->iterate(&blk); +} + +// Iterates an ObjectClosure over all objects within a HeapRegion. + +class IterateObjectClosureRegionClosure: public HeapRegionClosure { + ObjectClosure* _cl; +public: + IterateObjectClosureRegionClosure(ObjectClosure* cl) : _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + if (! r->continuesHumongous()) { + r->object_iterate(_cl); + } + return false; + } +}; + +void G1CollectedHeap::object_iterate(ObjectClosure* cl) { + IterateObjectClosureRegionClosure blk(cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) { + // FIXME: is this right? + guarantee(false, "object_iterate_since_last_GC not supported by G1 heap"); +} + +// Calls a SpaceClosure on a HeapRegion. + +class SpaceClosureRegionClosure: public HeapRegionClosure { + SpaceClosure* _cl; +public: + SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + _cl->do_space(r); + return false; + } +}; + +void G1CollectedHeap::space_iterate(SpaceClosure* cl) { + SpaceClosureRegionClosure blk(cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) { + _hrs->iterate(cl); +} + +void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r, + HeapRegionClosure* cl) { + _hrs->iterate_from(r, cl); +} + +void +G1CollectedHeap::heap_region_iterate_from(int idx, HeapRegionClosure* cl) { + _hrs->iterate_from(idx, cl); +} + +HeapRegion* G1CollectedHeap::region_at(size_t idx) { return _hrs->at(idx); } + +const int OverpartitionFactor = 4; +void +G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, + int worker, + jint claim_value) { + // We break up the heap regions into blocks of size ParallelGCThreads (to + // decrease iteration costs). + const size_t nregions = n_regions(); + const size_t n_thrds = (ParallelGCThreads > 0 ? ParallelGCThreads : 1); + const size_t partitions = n_thrds * OverpartitionFactor; + const size_t BlkSize = MAX2(nregions/partitions, (size_t)1); + const size_t n_blocks = (nregions + BlkSize - 1)/BlkSize; + assert(ParallelGCThreads > 0 || worker == 0, "Precondition"); + const int init_idx = (int) (n_blocks/n_thrds * worker); + for (size_t blk = 0; blk < n_blocks; blk++) { + size_t idx = init_idx + blk; + if (idx >= n_blocks) idx = idx - n_blocks; + size_t reg_idx = idx * BlkSize; + assert(reg_idx < nregions, "Because we rounded blk up."); + HeapRegion* r = region_at(reg_idx); + if (r->claimHeapRegion(claim_value)) { + for (size_t j = 0; j < BlkSize; j++) { + size_t reg_idx2 = reg_idx + j; + if (reg_idx2 == nregions) break; + HeapRegion* r2 = region_at(reg_idx2); + if (j > 0) r2->set_claim_value(claim_value); + bool res = cl->doHeapRegion(r2); + guarantee(!res, "Should not abort."); + } + } + } +} + +void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) { + HeapRegion* r = g1_policy()->collection_set(); + while (r != NULL) { + HeapRegion* next = r->next_in_collection_set(); + if (cl->doHeapRegion(r)) { + cl->incomplete(); + return; + } + r = next; + } +} + +void G1CollectedHeap::collection_set_iterate_from(HeapRegion* r, + HeapRegionClosure *cl) { + assert(r->in_collection_set(), + "Start region must be a member of the collection set."); + HeapRegion* cur = r; + while (cur != NULL) { + HeapRegion* next = cur->next_in_collection_set(); + if (cl->doHeapRegion(cur) && false) { + cl->incomplete(); + return; + } + cur = next; + } + cur = g1_policy()->collection_set(); + while (cur != r) { + HeapRegion* next = cur->next_in_collection_set(); + if (cl->doHeapRegion(cur) && false) { + cl->incomplete(); + return; + } + cur = next; + } +} + +CompactibleSpace* G1CollectedHeap::first_compactible_space() { + return _hrs->length() > 0 ? _hrs->at(0) : NULL; +} + + +Space* G1CollectedHeap::space_containing(const void* addr) const { + Space* res = heap_region_containing(addr); + if (res == NULL) + res = perm_gen()->space_containing(addr); + return res; +} + +HeapWord* G1CollectedHeap::block_start(const void* addr) const { + Space* sp = space_containing(addr); + if (sp != NULL) { + return sp->block_start(addr); + } + return NULL; +} + +size_t G1CollectedHeap::block_size(const HeapWord* addr) const { + Space* sp = space_containing(addr); + assert(sp != NULL, "block_size of address outside of heap"); + return sp->block_size(addr); +} + +bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const { + Space* sp = space_containing(addr); + return sp->block_is_obj(addr); +} + +bool G1CollectedHeap::supports_tlab_allocation() const { + return true; +} + +size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const { + return HeapRegion::GrainBytes; +} + +size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const { + // Return the remaining space in the cur alloc region, but not less than + // the min TLAB size. + // Also, no more than half the region size, since we can't allow tlabs to + // grow big enough to accomodate humongous objects. + + // We need to story it locally, since it might change between when we + // test for NULL and when we use it later. + ContiguousSpace* cur_alloc_space = _cur_alloc_region; + if (cur_alloc_space == NULL) { + return HeapRegion::GrainBytes/2; + } else { + return MAX2(MIN2(cur_alloc_space->free(), + (size_t)(HeapRegion::GrainBytes/2)), + (size_t)MinTLABSize); + } +} + +HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) { + bool dummy; + return G1CollectedHeap::mem_allocate(size, false, true, &dummy); +} + +bool G1CollectedHeap::allocs_are_zero_filled() { + return false; +} + +size_t G1CollectedHeap::large_typearray_limit() { + // FIXME + return HeapRegion::GrainBytes/HeapWordSize; +} + +size_t G1CollectedHeap::max_capacity() const { + return _g1_committed.byte_size(); +} + +jlong G1CollectedHeap::millis_since_last_gc() { + // assert(false, "NYI"); + return 0; +} + + +void G1CollectedHeap::prepare_for_verify() { + if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { + ensure_parsability(false); + } + g1_rem_set()->prepare_for_verify(); +} + +class VerifyLivenessOopClosure: public OopClosure { + G1CollectedHeap* g1h; +public: + VerifyLivenessOopClosure(G1CollectedHeap* _g1h) { + g1h = _g1h; + } + void do_oop(narrowOop *p) { + guarantee(false, "NYI"); + } + void do_oop(oop *p) { + oop obj = *p; + assert(obj == NULL || !g1h->is_obj_dead(obj), + "Dead object referenced by a not dead object"); + } +}; + +class VerifyObjsInRegionClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + size_t _live_bytes; + HeapRegion *_hr; +public: + VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) { + _g1h = G1CollectedHeap::heap(); + } + void do_object(oop o) { + VerifyLivenessOopClosure isLive(_g1h); + assert(o != NULL, "Huh?"); + if (!_g1h->is_obj_dead(o)) { + o->oop_iterate(&isLive); + if (!_hr->obj_allocated_since_prev_marking(o)) + _live_bytes += (o->size() * HeapWordSize); + } + } + size_t live_bytes() { return _live_bytes; } +}; + +class PrintObjsInRegionClosure : public ObjectClosure { + HeapRegion *_hr; + G1CollectedHeap *_g1; +public: + PrintObjsInRegionClosure(HeapRegion *hr) : _hr(hr) { + _g1 = G1CollectedHeap::heap(); + }; + + void do_object(oop o) { + if (o != NULL) { + HeapWord *start = (HeapWord *) o; + size_t word_sz = o->size(); + gclog_or_tty->print("\nPrinting obj "PTR_FORMAT" of size " SIZE_FORMAT + " isMarkedPrev %d isMarkedNext %d isAllocSince %d\n", + (void*) o, word_sz, + _g1->isMarkedPrev(o), + _g1->isMarkedNext(o), + _hr->obj_allocated_since_prev_marking(o)); + HeapWord *end = start + word_sz; + HeapWord *cur; + int *val; + for (cur = start; cur < end; cur++) { + val = (int *) cur; + gclog_or_tty->print("\t "PTR_FORMAT":"PTR_FORMAT"\n", val, *val); + } + } + } +}; + +class VerifyRegionClosure: public HeapRegionClosure { +public: + bool _allow_dirty; + VerifyRegionClosure(bool allow_dirty) + : _allow_dirty(allow_dirty) {} + bool doHeapRegion(HeapRegion* r) { + guarantee(r->claim_value() == 0, "Should be unclaimed at verify points."); + if (r->isHumongous()) { + if (r->startsHumongous()) { + // Verify the single H object. + oop(r->bottom())->verify(); + size_t word_sz = oop(r->bottom())->size(); + guarantee(r->top() == r->bottom() + word_sz, + "Only one object in a humongous region"); + } + } else { + VerifyObjsInRegionClosure not_dead_yet_cl(r); + r->verify(_allow_dirty); + r->object_iterate(¬_dead_yet_cl); + guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(), + "More live objects than counted in last complete marking."); + } + return false; + } +}; + +class VerifyRootsClosure: public OopsInGenClosure { +private: + G1CollectedHeap* _g1h; + bool _failures; + +public: + VerifyRootsClosure() : + _g1h(G1CollectedHeap::heap()), _failures(false) { } + + bool failures() { return _failures; } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + if (obj != NULL) { + if (_g1h->is_obj_dead(obj)) { + gclog_or_tty->print_cr("Root location "PTR_FORMAT" " + "points to dead obj "PTR_FORMAT, p, (void*) obj); + obj->print_on(gclog_or_tty); + _failures = true; + } + } + } +}; + +void G1CollectedHeap::verify(bool allow_dirty, bool silent) { + if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { + if (!silent) { gclog_or_tty->print("roots "); } + VerifyRootsClosure rootsCl; + process_strong_roots(false, + SharedHeap::SO_AllClasses, + &rootsCl, + &rootsCl); + rem_set()->invalidate(perm_gen()->used_region(), false); + if (!silent) { gclog_or_tty->print("heapRegions "); } + VerifyRegionClosure blk(allow_dirty); + _hrs->iterate(&blk); + if (!silent) gclog_or_tty->print("remset "); + rem_set()->verify(); + guarantee(!rootsCl.failures(), "should not have had failures"); + } else { + if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) "); + } +} + +class PrintRegionClosure: public HeapRegionClosure { + outputStream* _st; +public: + PrintRegionClosure(outputStream* st) : _st(st) {} + bool doHeapRegion(HeapRegion* r) { + r->print_on(_st); + return false; + } +}; + +void G1CollectedHeap::print() const { print_on(gclog_or_tty); } + +void G1CollectedHeap::print_on(outputStream* st) const { + PrintRegionClosure blk(st); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { + if (ParallelGCThreads > 0) { + workers()->print_worker_threads(); + } + st->print("\"G1 concurrent mark GC Thread\" "); + _cmThread->print(); + st->cr(); + st->print("\"G1 concurrent refinement GC Thread\" "); + _cg1r->cg1rThread()->print_on(st); + st->cr(); + st->print("\"G1 zero-fill GC Thread\" "); + _czft->print_on(st); + st->cr(); +} + +void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { + if (ParallelGCThreads > 0) { + workers()->threads_do(tc); + } + tc->do_thread(_cmThread); + tc->do_thread(_cg1r->cg1rThread()); + tc->do_thread(_czft); +} + +void G1CollectedHeap::print_tracing_info() const { + concurrent_g1_refine()->print_final_card_counts(); + + // We'll overload this to mean "trace GC pause statistics." + if (TraceGen0Time || TraceGen1Time) { + // The "G1CollectorPolicy" is keeping track of these stats, so delegate + // to that. + g1_policy()->print_tracing_info(); + } + if (SummarizeG1RSStats) { + g1_rem_set()->print_summary_info(); + } + if (SummarizeG1ConcMark) { + concurrent_mark()->print_summary_info(); + } + if (SummarizeG1ZFStats) { + ConcurrentZFThread::print_summary_info(); + } + if (G1SummarizePopularity) { + print_popularity_summary_info(); + } + g1_policy()->print_yg_surv_rate_info(); + + GCOverheadReporter::printGCOverhead(); + + SpecializationStats::print(); +} + + +int G1CollectedHeap::addr_to_arena_id(void* addr) const { + HeapRegion* hr = heap_region_containing(addr); + if (hr == NULL) { + return 0; + } else { + return 1; + } +} + +G1CollectedHeap* G1CollectedHeap::heap() { + assert(_sh->kind() == CollectedHeap::G1CollectedHeap, + "not a garbage-first heap"); + return _g1h; +} + +void G1CollectedHeap::gc_prologue(bool full /* Ignored */) { + if (PrintHeapAtGC){ + gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections()); + Universe::print(); + } + assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer"); + // Call allocation profiler + AllocationProfiler::iterate_since_last_gc(); + // Fill TLAB's and such + ensure_parsability(true); +} + +void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) { + // FIXME: what is this about? + // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled" + // is set. + COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(), + "derived pointer present")); + + if (PrintHeapAtGC){ + gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections()); + Universe::print(); + gclog_or_tty->print("} "); + } +} + +void G1CollectedHeap::do_collection_pause() { + // Read the GC count while holding the Heap_lock + // we need to do this _before_ wait_for_cleanup_complete(), to + // ensure that we do not give up the heap lock and potentially + // pick up the wrong count + int gc_count_before = SharedHeap::heap()->total_collections(); + + // Don't want to do a GC pause while cleanup is being completed! + wait_for_cleanup_complete(); + + g1_policy()->record_stop_world_start(); + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1IncCollectionPause op(gc_count_before); + VMThread::execute(&op); + } +} + +void +G1CollectedHeap::doConcurrentMark() { + if (G1ConcMark) { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + if (!_cmThread->in_progress()) { + _cmThread->set_started(); + CGC_lock->notify(); + } + } +} + +class VerifyMarkedObjsClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + public: + VerifyMarkedObjsClosure(G1CollectedHeap* g1h) : _g1h(g1h) {} + void do_object(oop obj) { + assert(obj->mark()->is_marked() ? !_g1h->is_obj_dead(obj) : true, + "markandsweep mark should agree with concurrent deadness"); + } +}; + +void +G1CollectedHeap::checkConcurrentMark() { + VerifyMarkedObjsClosure verifycl(this); + doConcurrentMark(); + // MutexLockerEx x(getMarkBitMapLock(), + // Mutex::_no_safepoint_check_flag); + object_iterate(&verifycl); +} + +void G1CollectedHeap::do_sync_mark() { + _cm->checkpointRootsInitial(); + _cm->markFromRoots(); + _cm->checkpointRootsFinal(false); +} + +// + +double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr, + bool young) { + return _g1_policy->predict_region_elapsed_time_ms(hr, young); +} + +void G1CollectedHeap::check_if_region_is_too_expensive(double + predicted_time_ms) { + _g1_policy->check_if_region_is_too_expensive(predicted_time_ms); +} + +size_t G1CollectedHeap::pending_card_num() { + size_t extra_cards = 0; + JavaThread *curr = Threads::first(); + while (curr != NULL) { + DirtyCardQueue& dcq = curr->dirty_card_queue(); + extra_cards += dcq.size(); + curr = curr->next(); + } + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + size_t buffer_size = dcqs.buffer_size(); + size_t buffer_num = dcqs.completed_buffers_num(); + return buffer_size * buffer_num + extra_cards; +} + +size_t G1CollectedHeap::max_pending_card_num() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + size_t buffer_size = dcqs.buffer_size(); + size_t buffer_num = dcqs.completed_buffers_num(); + int thread_num = Threads::number_of_threads(); + return (buffer_num + thread_num) * buffer_size; +} + +size_t G1CollectedHeap::cards_scanned() { + HRInto_G1RemSet* g1_rset = (HRInto_G1RemSet*) g1_rem_set(); + return g1_rset->cardsScanned(); +} + +void +G1CollectedHeap::setup_surviving_young_words() { + guarantee( _surviving_young_words == NULL, "pre-condition" ); + size_t array_length = g1_policy()->young_cset_length(); + _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length); + if (_surviving_young_words == NULL) { + vm_exit_out_of_memory(sizeof(size_t) * array_length, + "Not enough space for young surv words summary."); + } + memset(_surviving_young_words, 0, array_length * sizeof(size_t)); + for (size_t i = 0; i < array_length; ++i) { + guarantee( _surviving_young_words[i] == 0, "invariant" ); + } +} + +void +G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + size_t array_length = g1_policy()->young_cset_length(); + for (size_t i = 0; i < array_length; ++i) + _surviving_young_words[i] += surv_young_words[i]; +} + +void +G1CollectedHeap::cleanup_surviving_young_words() { + guarantee( _surviving_young_words != NULL, "pre-condition" ); + FREE_C_HEAP_ARRAY(size_t, _surviving_young_words); + _surviving_young_words = NULL; +} + +// + +void +G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { + char verbose_str[128]; + sprintf(verbose_str, "GC pause "); + if (popular_region != NULL) + strcat(verbose_str, "(popular)"); + else if (g1_policy()->in_young_gc_mode()) { + if (g1_policy()->full_young_gcs()) + strcat(verbose_str, "(young)"); + else + strcat(verbose_str, "(partial)"); + } + bool reset_should_initiate_conc_mark = false; + if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) { + // we currently do not allow an initial mark phase to be piggy-backed + // on a popular pause + reset_should_initiate_conc_mark = true; + g1_policy()->unset_should_initiate_conc_mark(); + } + if (g1_policy()->should_initiate_conc_mark()) + strcat(verbose_str, " (initial-mark)"); + + GCCauseSetter x(this, (popular_region == NULL ? + GCCause::_g1_inc_collection_pause : + GCCause::_g1_pop_region_collection_pause)); + + // if PrintGCDetails is on, we'll print long statistics information + // in the collector policy code, so let's not print this as the output + // is messy if we do. + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty); + + ResourceMark rm; + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread"); + guarantee(!is_gc_active(), "collection is not reentrant"); + assert(regions_accounted_for(), "Region leakage!"); + ++_gc_time_stamp; + + if (g1_policy()->in_young_gc_mode()) { + assert(check_young_list_well_formed(), + "young list should be well formed"); + } + + if (GC_locker::is_active()) { + return; // GC is disabled (e.g. JNI GetXXXCritical operation) + } + + bool abandoned = false; + { // Call to jvmpi::post_class_unload_events must occur outside of active GC + IsGCActiveMark x; + + gc_prologue(false); + increment_total_collections(); + +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("\nJust chose CS, heap:"); + print(); +#endif + + if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + prepare_for_verify(); + gclog_or_tty->print(" VerifyBeforeGC:"); + Universe::verify(false); + } + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + // We want to turn off ref discovere, if necessary, and turn it back on + // on again later if we do. + bool was_enabled = ref_processor()->discovery_enabled(); + if (was_enabled) ref_processor()->disable_discovery(); + + // Forget the current alloc region (we might even choose it to be part + // of the collection set!). + abandon_cur_alloc_region(); + + // The elapsed time induced by the start time below deliberately elides + // the possible verification above. + double start_time_sec = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start_time_sec); + size_t start_used_bytes = used(); + if (!G1ConcMark) { + do_sync_mark(); + } + + g1_policy()->record_collection_pause_start(start_time_sec, + start_used_bytes); + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + if (g1_policy()->should_initiate_conc_mark()) { + concurrent_mark()->checkpointRootsInitialPre(); + } + save_marks(); + + // We must do this before any possible evacuation that should propogate + // marks, including evacuation of popular objects in a popular pause. + if (mark_in_progress()) { + double start_time_sec = os::elapsedTime(); + + _cm->drainAllSATBBuffers(); + double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0; + g1_policy()->record_satb_drain_time(finish_mark_ms); + + } + // Record the number of elements currently on the mark stack, so we + // only iterate over these. (Since evacuation may add to the mark + // stack, doing more exposes race conditions.) If no mark is in + // progress, this will be zero. + _cm->set_oops_do_bound(); + + assert(regions_accounted_for(), "Region leakage."); + + bool abandoned = false; + + if (mark_in_progress()) + concurrent_mark()->newCSet(); + + // Now choose the CS. + if (popular_region == NULL) { + g1_policy()->choose_collection_set(); + } else { + // We may be evacuating a single region (for popularity). + g1_policy()->record_popular_pause_preamble_start(); + popularity_pause_preamble(popular_region); + g1_policy()->record_popular_pause_preamble_end(); + abandoned = (g1_policy()->collection_set() == NULL); + // Now we allow more regions to be added (we have to collect + // all popular regions). + if (!abandoned) { + g1_policy()->choose_collection_set(popular_region); + } + } + // We may abandon a pause if we find no region that will fit in the MMU + // pause. + abandoned = (g1_policy()->collection_set() == NULL); + + // Nothing to do if we were unable to choose a collection set. + if (!abandoned) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("\nAfter pause, heap:"); + print(); +#endif + + setup_surviving_young_words(); + + // Set up the gc allocation regions. + get_gc_alloc_regions(); + + // Actually do the work... + evacuate_collection_set(); + free_collection_set(g1_policy()->collection_set()); + g1_policy()->clear_collection_set(); + + if (popular_region != NULL) { + // We have to wait until now, because we don't want the region to + // be rescheduled for pop-evac during RS update. + popular_region->set_popular_pending(false); + } + + release_gc_alloc_regions(); + + cleanup_surviving_young_words(); + + if (g1_policy()->in_young_gc_mode()) { + _young_list->reset_sampled_info(); + assert(check_young_list_empty(true), + "young list should be empty"); + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + _young_list->reset_auxilary_lists(); + } + } else { + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + } + + if (evacuation_failed()) { + _summary_bytes_used = recalculate_used(); + } else { + // The "used" of the the collection set have already been subtracted + // when they were freed. Add in the bytes evacuated. + _summary_bytes_used += g1_policy()->bytes_in_to_space(); + } + + if (g1_policy()->in_young_gc_mode() && + g1_policy()->should_initiate_conc_mark()) { + concurrent_mark()->checkpointRootsInitialPost(); + set_marking_started(); + doConcurrentMark(); + } + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + double end_time_sec = os::elapsedTime(); + g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); + GCOverheadReporter::recordSTWEnd(end_time_sec); + g1_policy()->record_collection_pause_end(popular_region != NULL, + abandoned); + + assert(regions_accounted_for(), "Region leakage."); + + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + gclog_or_tty->print(" VerifyAfterGC:"); + Universe::verify(false); + } + + if (was_enabled) ref_processor()->enable_discovery(); + + { + size_t expand_bytes = g1_policy()->expansion_amount(); + if (expand_bytes > 0) { + size_t bytes_before = capacity(); + expand(expand_bytes); + } + } + + if (mark_in_progress()) + concurrent_mark()->update_g1_committed(); + + gc_epilogue(false); + } + + assert(verify_region_lists(), "Bad region lists."); + + if (reset_should_initiate_conc_mark) + g1_policy()->set_should_initiate_conc_mark(); + + if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { + gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); + print_tracing_info(); + vm_exit(-1); + } +} + +void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) { + assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose"); + HeapWord* original_top = NULL; + if (r != NULL) + original_top = r->top(); + + // We will want to record the used space in r as being there before gc. + // One we install it as a GC alloc region it's eligible for allocation. + // So record it now and use it later. + size_t r_used = 0; + if (r != NULL) { + r_used = r->used(); + + if (ParallelGCThreads > 0) { + // need to take the lock to guard against two threads calling + // get_gc_alloc_region concurrently (very unlikely but...) + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + r->save_marks(); + } + } + HeapRegion* old_alloc_region = _gc_alloc_regions[purpose]; + _gc_alloc_regions[purpose] = r; + if (old_alloc_region != NULL) { + // Replace aliases too. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + if (_gc_alloc_regions[ap] == old_alloc_region) { + _gc_alloc_regions[ap] = r; + } + } + } + if (r != NULL) { + push_gc_alloc_region(r); + if (mark_in_progress() && original_top != r->next_top_at_mark_start()) { + // We are using a region as a GC alloc region after it has been used + // as a mutator allocation region during the current marking cycle. + // The mutator-allocated objects are currently implicitly marked, but + // when we move hr->next_top_at_mark_start() forward at the the end + // of the GC pause, they won't be. We therefore mark all objects in + // the "gap". We do this object-by-object, since marking densely + // does not currently work right with marking bitmap iteration. This + // means we rely on TLAB filling at the start of pauses, and no + // "resuscitation" of filled TLAB's. If we want to do this, we need + // to fix the marking bitmap iteration. + HeapWord* curhw = r->next_top_at_mark_start(); + HeapWord* t = original_top; + + while (curhw < t) { + oop cur = (oop)curhw; + // We'll assume parallel for generality. This is rare code. + concurrent_mark()->markAndGrayObjectIfNecessary(cur); // can't we just mark them? + curhw = curhw + cur->size(); + } + assert(curhw == t, "Should have parsed correctly."); + } + if (G1PolicyVerbose > 1) { + gclog_or_tty->print("New alloc region ["PTR_FORMAT", "PTR_FORMAT", " PTR_FORMAT") " + "for survivors:", r->bottom(), original_top, r->end()); + r->print(); + } + g1_policy()->record_before_bytes(r_used); + } +} + +void G1CollectedHeap::push_gc_alloc_region(HeapRegion* hr) { + assert(Thread::current()->is_VM_thread() || + par_alloc_during_gc_lock()->owned_by_self(), "Precondition"); + assert(!hr->is_gc_alloc_region() && !hr->in_collection_set(), + "Precondition."); + hr->set_is_gc_alloc_region(true); + hr->set_next_gc_alloc_region(_gc_alloc_region_list); + _gc_alloc_region_list = hr; +} + +#ifdef G1_DEBUG +class FindGCAllocRegion: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + if (r->is_gc_alloc_region()) { + gclog_or_tty->print_cr("Region %d ["PTR_FORMAT"...] is still a gc_alloc_region.", + r->hrs_index(), r->bottom()); + } + return false; + } +}; +#endif // G1_DEBUG + +void G1CollectedHeap::forget_alloc_region_list() { + assert(Thread::current()->is_VM_thread(), "Precondition"); + while (_gc_alloc_region_list != NULL) { + HeapRegion* r = _gc_alloc_region_list; + assert(r->is_gc_alloc_region(), "Invariant."); + _gc_alloc_region_list = r->next_gc_alloc_region(); + r->set_next_gc_alloc_region(NULL); + r->set_is_gc_alloc_region(false); + if (r->is_empty()) { + ++_free_regions; + } + } +#ifdef G1_DEBUG + FindGCAllocRegion fa; + heap_region_iterate(&fa); +#endif // G1_DEBUG +} + + +bool G1CollectedHeap::check_gc_alloc_regions() { + // TODO: allocation regions check + return true; +} + +void G1CollectedHeap::get_gc_alloc_regions() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + // Create new GC alloc regions. + HeapRegion* alloc_region = _gc_alloc_regions[ap]; + // Clear this alloc region, so that in case it turns out to be + // unacceptable, we end up with no allocation region, rather than a bad + // one. + _gc_alloc_regions[ap] = NULL; + if (alloc_region == NULL || alloc_region->in_collection_set()) { + // Can't re-use old one. Allocate a new one. + alloc_region = newAllocRegionWithExpansion(ap, 0); + } + if (alloc_region != NULL) { + set_gc_alloc_region(ap, alloc_region); + } + } + // Set alternative regions for allocation purposes that have reached + // thier limit. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap); + if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) { + _gc_alloc_regions[ap] = _gc_alloc_regions[alt_purpose]; + } + } + assert(check_gc_alloc_regions(), "alloc regions messed up"); +} + +void G1CollectedHeap::release_gc_alloc_regions() { + // We keep a separate list of all regions that have been alloc regions in + // the current collection pause. Forget that now. + forget_alloc_region_list(); + + // The current alloc regions contain objs that have survived + // collection. Make them no longer GC alloc regions. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + if (r != NULL && r->is_empty()) { + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + r->set_zero_fill_complete(); + put_free_region_on_list_locked(r); + } + } + // set_gc_alloc_region will also NULLify all aliases to the region + set_gc_alloc_region(ap, NULL); + _gc_alloc_region_counts[ap] = 0; + } +} + +void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) { + _drain_in_progress = false; + set_evac_failure_closure(cl); + _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); +} + +void G1CollectedHeap::finalize_for_evac_failure() { + assert(_evac_failure_scan_stack != NULL && + _evac_failure_scan_stack->length() == 0, + "Postcondition"); + assert(!_drain_in_progress, "Postcondition"); + // Don't have to delete, since the scan stack is a resource object. + _evac_failure_scan_stack = NULL; +} + + + +// *** Sequential G1 Evacuation + +HeapWord* G1CollectedHeap::allocate_during_gc(GCAllocPurpose purpose, size_t word_size) { + HeapRegion* alloc_region = _gc_alloc_regions[purpose]; + // let the caller handle alloc failure + if (alloc_region == NULL) return NULL; + assert(isHumongous(word_size) || !alloc_region->isHumongous(), + "Either the object is humongous or the region isn't"); + HeapWord* block = alloc_region->allocate(word_size); + if (block == NULL) { + block = allocate_during_gc_slow(purpose, alloc_region, false, word_size); + } + return block; +} + +class G1IsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; +public: + G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_object(oop p) { assert(false, "Do not call."); } + bool do_object_b(oop p) { + // It is reachable if it is outside the collection set, or is inside + // and forwarded. + +#ifdef G1_DEBUG + gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d", + (void*) p, _g1->obj_in_cs(p), p->is_forwarded(), + !_g1->obj_in_cs(p) || p->is_forwarded()); +#endif // G1_DEBUG + + return !_g1->obj_in_cs(p) || p->is_forwarded(); + } +}; + +class G1KeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; +public: + G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; +#ifdef G1_DEBUG + if (PrintGC && Verbose) { + gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT, + p, (void*) obj, (void*) *p); + } +#endif // G1_DEBUG + + if (_g1->obj_in_cs(obj)) { + assert( obj->is_forwarded(), "invariant" ); + *p = obj->forwardee(); + +#ifdef G1_DEBUG + gclog_or_tty->print_cr(" in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT, + (void*) obj, (void*) *p); +#endif // G1_DEBUG + } + } +}; + +class RecreateRSetEntriesClosure: public OopClosure { +private: + G1CollectedHeap* _g1; + G1RemSet* _g1_rem_set; + HeapRegion* _from; +public: + RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) + {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (*p != NULL) { + _g1_rem_set->write_ref(_from, p); + } + } +}; + +class RemoveSelfPointerClosure: public ObjectClosure { +private: + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + HeapRegion* _hr; + HeapWord* _last_self_forwarded_end; + size_t _prev_marked_bytes; + size_t _next_marked_bytes; +public: + RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : + _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), + _last_self_forwarded_end(_hr->bottom()), + _prev_marked_bytes(0), _next_marked_bytes(0) + {} + + size_t prev_marked_bytes() { return _prev_marked_bytes; } + size_t next_marked_bytes() { return _next_marked_bytes; } + + void fill_remainder() { + HeapWord* limit = _hr->top(); + MemRegion mr(_last_self_forwarded_end, limit); + if (!mr.is_empty()) { + SharedHeap::fill_region_with_object(mr); + _cm->clearRangeBothMaps(mr); + _hr->declare_filled_region_to_BOT(mr); + } + } + + void do_object(oop obj) { + if (obj->is_forwarded()) { + if (obj->forwardee() == obj) { + assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs."); + _cm->markPrev(obj); + assert(_cm->isPrevMarked(obj), "Should be marked!"); + _prev_marked_bytes += (obj->size() * HeapWordSize); + if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) { + _cm->markAndGrayObjectIfNecessary(obj); + } + HeapWord* obj_start = (HeapWord*)obj; + if (obj_start > _last_self_forwarded_end) { + MemRegion mr(_last_self_forwarded_end, obj_start); + SharedHeap::fill_region_with_object(mr); + assert(_cm->isPrevMarked(obj), "Should be marked!"); + _cm->clearRangeBothMaps(mr); + assert(_cm->isPrevMarked(obj), "Should be marked!"); + _hr->declare_filled_region_to_BOT(mr); + } + _last_self_forwarded_end = obj_start + obj->size(); + obj->set_mark(markOopDesc::prototype()); + + // While we were processing RSet buffers during the + // collection, we actually didn't scan any cards on the + // collection set, since we didn't want to update remebered + // sets with entries that point into the collection set, given + // that live objects fromthe collection set are about to move + // and such entries will be stale very soon. This change also + // dealt with a reliability issue which involved scanning a + // card in the collection set and coming across an array that + // was being chunked and looking malformed. The problem is + // that, if evacuation fails, we might have remembered set + // entries missing given that we skipped cards on the + // collection set. So, we'll recreate such entries now. + RecreateRSetEntriesClosure cl(_g1, _hr); + obj->oop_iterate(&cl); + + assert(_cm->isPrevMarked(obj), "Should be marked!"); + } + } + } +}; + +void G1CollectedHeap::remove_self_forwarding_pointers() { + HeapRegion* cur = g1_policy()->collection_set(); + + while (cur != NULL) { + assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + + if (cur->evacuation_failed()) { + RemoveSelfPointerClosure rspc(_g1h, cur); + assert(cur->in_collection_set(), "bad CS"); + cur->object_iterate(&rspc); + rspc.fill_remainder(); + + // A number of manipulations to make the TAMS be the current top, + // and the marked bytes be the ones observed in the iteration. + if (_g1h->concurrent_mark()->at_least_one_mark_complete()) { + // The comments below are the postconditions achieved by the + // calls. Note especially the last such condition, which says that + // the count of marked bytes has been properly restored. + cur->note_start_of_marking(false); + // _next_top_at_mark_start == top, _next_marked_bytes == 0 + cur->add_to_marked_bytes(rspc.prev_marked_bytes()); + // _next_marked_bytes == prev_marked_bytes. + cur->note_end_of_marking(); + // _prev_top_at_mark_start == top(), + // _prev_marked_bytes == prev_marked_bytes + } + // If there is no mark in progress, we modified the _next variables + // above needlessly, but harmlessly. + if (_g1h->mark_in_progress()) { + cur->note_start_of_marking(false); + // _next_top_at_mark_start == top, _next_marked_bytes == 0 + // _next_marked_bytes == next_marked_bytes. + } + + // Now make sure the region has the right index in the sorted array. + g1_policy()->note_change_in_marked_bytes(cur); + } + cur = cur->next_in_collection_set(); + } + assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + + // Now restore saved marks, if any. + if (_objs_with_preserved_marks != NULL) { + assert(_preserved_marks_of_objs != NULL, "Both or none."); + assert(_objs_with_preserved_marks->length() == + _preserved_marks_of_objs->length(), "Both or none."); + guarantee(_objs_with_preserved_marks->length() == + _preserved_marks_of_objs->length(), "Both or none."); + for (int i = 0; i < _objs_with_preserved_marks->length(); i++) { + oop obj = _objs_with_preserved_marks->at(i); + markOop m = _preserved_marks_of_objs->at(i); + obj->set_mark(m); + } + // Delete the preserved marks growable arrays (allocated on the C heap). + delete _objs_with_preserved_marks; + delete _preserved_marks_of_objs; + _objs_with_preserved_marks = NULL; + _preserved_marks_of_objs = NULL; + } +} + +void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) { + _evac_failure_scan_stack->push(obj); +} + +void G1CollectedHeap::drain_evac_failure_scan_stack() { + assert(_evac_failure_scan_stack != NULL, "precondition"); + + while (_evac_failure_scan_stack->length() > 0) { + oop obj = _evac_failure_scan_stack->pop(); + _evac_failure_closure->set_region(heap_region_containing(obj)); + obj->oop_iterate_backwards(_evac_failure_closure); + } +} + +void G1CollectedHeap::handle_evacuation_failure(oop old) { + markOop m = old->mark(); + // forward to self + assert(!old->is_forwarded(), "precondition"); + + old->forward_to(old); + handle_evacuation_failure_common(old, m); +} + +oop +G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, + oop old) { + markOop m = old->mark(); + oop forward_ptr = old->forward_to_atomic(old); + if (forward_ptr == NULL) { + // Forward-to-self succeeded. + if (_evac_failure_closure != cl) { + MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag); + assert(!_drain_in_progress, + "Should only be true while someone holds the lock."); + // Set the global evac-failure closure to the current thread's. + assert(_evac_failure_closure == NULL, "Or locking has failed."); + set_evac_failure_closure(cl); + // Now do the common part. + handle_evacuation_failure_common(old, m); + // Reset to NULL. + set_evac_failure_closure(NULL); + } else { + // The lock is already held, and this is recursive. + assert(_drain_in_progress, "This should only be the recursive case."); + handle_evacuation_failure_common(old, m); + } + return old; + } else { + // Someone else had a place to copy it. + return forward_ptr; + } +} + +void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) { + set_evacuation_failed(true); + + preserve_mark_if_necessary(old, m); + + HeapRegion* r = heap_region_containing(old); + if (!r->evacuation_failed()) { + r->set_evacuation_failed(true); + if (G1TraceRegions) { + gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" " + "["PTR_FORMAT","PTR_FORMAT")\n", + r, r->bottom(), r->end()); + } + } + + push_on_evac_failure_scan_stack(old); + + if (!_drain_in_progress) { + // prevent recursion in copy_to_survivor_space() + _drain_in_progress = true; + drain_evac_failure_scan_stack(); + _drain_in_progress = false; + } +} + +void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) { + if (m != markOopDesc::prototype()) { + if (_objs_with_preserved_marks == NULL) { + assert(_preserved_marks_of_objs == NULL, "Both or none."); + _objs_with_preserved_marks = + new (ResourceObj::C_HEAP) GrowableArray(40, true); + _preserved_marks_of_objs = + new (ResourceObj::C_HEAP) GrowableArray(40, true); + } + _objs_with_preserved_marks->push(obj); + _preserved_marks_of_objs->push(m); + } +} + +// *** Parallel G1 Evacuation + +HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose, + size_t word_size) { + HeapRegion* alloc_region = _gc_alloc_regions[purpose]; + // let the caller handle alloc failure + if (alloc_region == NULL) return NULL; + + HeapWord* block = alloc_region->par_allocate(word_size); + if (block == NULL) { + MutexLockerEx x(par_alloc_during_gc_lock(), + Mutex::_no_safepoint_check_flag); + block = allocate_during_gc_slow(purpose, alloc_region, true, word_size); + } + return block; +} + +HeapWord* +G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose, + HeapRegion* alloc_region, + bool par, + size_t word_size) { + HeapWord* block = NULL; + // In the parallel case, a previous thread to obtain the lock may have + // already assigned a new gc_alloc_region. + if (alloc_region != _gc_alloc_regions[purpose]) { + assert(par, "But should only happen in parallel case."); + alloc_region = _gc_alloc_regions[purpose]; + if (alloc_region == NULL) return NULL; + block = alloc_region->par_allocate(word_size); + if (block != NULL) return block; + // Otherwise, continue; this new region is empty, too. + } + assert(alloc_region != NULL, "We better have an allocation region"); + // Another thread might have obtained alloc_region for the given + // purpose, and might be attempting to allocate in it, and might + // succeed. Therefore, we can't do the "finalization" stuff on the + // region below until we're sure the last allocation has happened. + // We ensure this by allocating the remaining space with a garbage + // object. + if (par) par_allocate_remaining_space(alloc_region); + // Now we can do the post-GC stuff on the region. + alloc_region->note_end_of_copying(); + g1_policy()->record_after_bytes(alloc_region->used()); + + if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) { + // Cannot allocate more regions for the given purpose. + GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(purpose); + // Is there an alternative? + if (purpose != alt_purpose) { + HeapRegion* alt_region = _gc_alloc_regions[alt_purpose]; + // Has not the alternative region been aliased? + if (alloc_region != alt_region) { + // Try to allocate in the alternative region. + if (par) { + block = alt_region->par_allocate(word_size); + } else { + block = alt_region->allocate(word_size); + } + // Make an alias. + _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose]; + } + if (block != NULL) { + return block; + } + // Both the allocation region and the alternative one are full + // and aliased, replace them with a new allocation region. + purpose = alt_purpose; + } else { + set_gc_alloc_region(purpose, NULL); + return NULL; + } + } + + // Now allocate a new region for allocation. + alloc_region = newAllocRegionWithExpansion(purpose, word_size, false /*zero_filled*/); + + // let the caller handle alloc failure + if (alloc_region != NULL) { + + assert(check_gc_alloc_regions(), "alloc regions messed up"); + assert(alloc_region->saved_mark_at_top(), + "Mark should have been saved already."); + // We used to assert that the region was zero-filled here, but no + // longer. + + // This must be done last: once it's installed, other regions may + // allocate in it (without holding the lock.) + set_gc_alloc_region(purpose, alloc_region); + + if (par) { + block = alloc_region->par_allocate(word_size); + } else { + block = alloc_region->allocate(word_size); + } + // Caller handles alloc failure. + } else { + // This sets other apis using the same old alloc region to NULL, also. + set_gc_alloc_region(purpose, NULL); + } + return block; // May be NULL. +} + +void G1CollectedHeap::par_allocate_remaining_space(HeapRegion* r) { + HeapWord* block = NULL; + size_t free_words; + do { + free_words = r->free()/HeapWordSize; + // If there's too little space, no one can allocate, so we're done. + if (free_words < (size_t)oopDesc::header_size()) return; + // Otherwise, try to claim it. + block = r->par_allocate(free_words); + } while (block == NULL); + SharedHeap::fill_region_with_object(MemRegion(block, free_words)); +} + +#define use_local_bitmaps 1 +#define verify_local_bitmaps 0 + +#ifndef PRODUCT + +class GCLabBitMap; +class GCLabBitMapClosure: public BitMapClosure { +private: + ConcurrentMark* _cm; + GCLabBitMap* _bitmap; + +public: + GCLabBitMapClosure(ConcurrentMark* cm, + GCLabBitMap* bitmap) { + _cm = cm; + _bitmap = bitmap; + } + + virtual bool do_bit(size_t offset); +}; + +#endif // PRODUCT + +#define oop_buffer_length 256 + +class GCLabBitMap: public BitMap { +private: + ConcurrentMark* _cm; + + int _shifter; + size_t _bitmap_word_covers_words; + + // beginning of the heap + HeapWord* _heap_start; + + // this is the actual start of the GCLab + HeapWord* _real_start_word; + + // this is the actual end of the GCLab + HeapWord* _real_end_word; + + // this is the first word, possibly located before the actual start + // of the GCLab, that corresponds to the first bit of the bitmap + HeapWord* _start_word; + + // size of a GCLab in words + size_t _gclab_word_size; + + static int shifter() { + return MinObjAlignment - 1; + } + + // how many heap words does a single bitmap word corresponds to? + static size_t bitmap_word_covers_words() { + return BitsPerWord << shifter(); + } + + static size_t gclab_word_size() { + return ParallelGCG1AllocBufferSize / HeapWordSize; + } + + static size_t bitmap_size_in_bits() { + size_t bits_in_bitmap = gclab_word_size() >> shifter(); + // We are going to ensure that the beginning of a word in this + // bitmap also corresponds to the beginning of a word in the + // global marking bitmap. To handle the case where a GCLab + // starts from the middle of the bitmap, we need to add enough + // space (i.e. up to a bitmap word) to ensure that we have + // enough bits in the bitmap. + return bits_in_bitmap + BitsPerWord - 1; + } +public: + GCLabBitMap(HeapWord* heap_start) + : BitMap(bitmap_size_in_bits()), + _cm(G1CollectedHeap::heap()->concurrent_mark()), + _shifter(shifter()), + _bitmap_word_covers_words(bitmap_word_covers_words()), + _heap_start(heap_start), + _gclab_word_size(gclab_word_size()), + _real_start_word(NULL), + _real_end_word(NULL), + _start_word(NULL) + { + guarantee( size_in_words() >= bitmap_size_in_words(), + "just making sure"); + } + + inline unsigned heapWordToOffset(HeapWord* addr) { + unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter; + assert(offset < size(), "offset should be within bounds"); + return offset; + } + + inline HeapWord* offsetToHeapWord(size_t offset) { + HeapWord* addr = _start_word + (offset << _shifter); + assert(_real_start_word <= addr && addr < _real_end_word, "invariant"); + return addr; + } + + bool fields_well_formed() { + bool ret1 = (_real_start_word == NULL) && + (_real_end_word == NULL) && + (_start_word == NULL); + if (ret1) + return true; + + bool ret2 = _real_start_word >= _start_word && + _start_word < _real_end_word && + (_real_start_word + _gclab_word_size) == _real_end_word && + (_start_word + _gclab_word_size + _bitmap_word_covers_words) + > _real_end_word; + return ret2; + } + + inline bool mark(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + if (addr >= _real_start_word && addr < _real_end_word) { + assert(!isMarked(addr), "should not have already been marked"); + + // first mark it on the bitmap + at_put(heapWordToOffset(addr), true); + + return true; + } else { + return false; + } + } + + inline bool isMarked(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + return at(heapWordToOffset(addr)); + } + + void set_buffer(HeapWord* start) { + guarantee(use_local_bitmaps, "invariant"); + clear(); + + assert(start != NULL, "invariant"); + _real_start_word = start; + _real_end_word = start + _gclab_word_size; + + size_t diff = + pointer_delta(start, _heap_start) % _bitmap_word_covers_words; + _start_word = start - diff; + + assert(fields_well_formed(), "invariant"); + } + +#ifndef PRODUCT + void verify() { + // verify that the marks have been propagated + GCLabBitMapClosure cl(_cm, this); + iterate(&cl); + } +#endif // PRODUCT + + void retire() { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + if (_start_word != NULL) { + CMBitMap* mark_bitmap = _cm->nextMarkBitMap(); + + // this means that the bitmap was set up for the GCLab + assert(_real_start_word != NULL && _real_end_word != NULL, "invariant"); + + mark_bitmap->mostly_disjoint_range_union(this, + 0, // always start from the start of the bitmap + _start_word, + size_in_words()); + _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word)); + +#ifndef PRODUCT + if (use_local_bitmaps && verify_local_bitmaps) + verify(); +#endif // PRODUCT + } else { + assert(_real_start_word == NULL && _real_end_word == NULL, "invariant"); + } + } + + static size_t bitmap_size_in_words() { + return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord; + } +}; + +#ifndef PRODUCT + +bool GCLabBitMapClosure::do_bit(size_t offset) { + HeapWord* addr = _bitmap->offsetToHeapWord(offset); + guarantee(_cm->isMarked(oop(addr)), "it should be!"); + return true; +} + +#endif // PRODUCT + +class G1ParGCAllocBuffer: public ParGCAllocBuffer { +private: + bool _retired; + bool _during_marking; + GCLabBitMap _bitmap; + +public: + G1ParGCAllocBuffer() : + ParGCAllocBuffer(ParallelGCG1AllocBufferSize / HeapWordSize), + _during_marking(G1CollectedHeap::heap()->mark_in_progress()), + _bitmap(G1CollectedHeap::heap()->reserved_region().start()), + _retired(false) + { } + + inline bool mark(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(_during_marking, "invariant"); + return _bitmap.mark(addr); + } + + inline void set_buf(HeapWord* buf) { + if (use_local_bitmaps && _during_marking) + _bitmap.set_buffer(buf); + ParGCAllocBuffer::set_buf(buf); + _retired = false; + } + + inline void retire(bool end_of_gc, bool retain) { + if (_retired) + return; + if (use_local_bitmaps && _during_marking) { + _bitmap.retire(); + } + ParGCAllocBuffer::retire(end_of_gc, retain); + _retired = true; + } +}; + + +class G1ParScanThreadState : public StackObj { +protected: + G1CollectedHeap* _g1h; + RefToScanQueue* _refs; + + typedef GrowableArray OverflowQueue; + OverflowQueue* _overflowed_refs; + + G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount]; + + size_t _alloc_buffer_waste; + size_t _undo_waste; + + OopsInHeapRegionClosure* _evac_failure_cl; + G1ParScanHeapEvacClosure* _evac_cl; + G1ParScanPartialArrayClosure* _partial_scan_cl; + + int _hash_seed; + int _queue_num; + + int _term_attempts; +#if G1_DETAILED_STATS + int _pushes, _pops, _steals, _steal_attempts; + int _overflow_pushes; +#endif + + double _start; + double _start_strong_roots; + double _strong_roots_time; + double _start_term; + double _term_time; + + // Map from young-age-index (0 == not young, 1 is youngest) to + // surviving words. base is what we get back from the malloc call + size_t* _surviving_young_words_base; + // this points into the array, as we use the first few entries for padding + size_t* _surviving_young_words; + +#define PADDING_ELEM_NUM (64 / sizeof(size_t)) + + void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; } + + void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + +public: + G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) + : _g1h(g1h), + _refs(g1h->task_queue(queue_num)), + _hash_seed(17), _queue_num(queue_num), + _term_attempts(0), +#if G1_DETAILED_STATS + _pushes(0), _pops(0), _steals(0), + _steal_attempts(0), _overflow_pushes(0), +#endif + _strong_roots_time(0), _term_time(0), + _alloc_buffer_waste(0), _undo_waste(0) + { + // we allocate G1YoungSurvRateNumRegions plus one entries, since + // we "sacrifice" entry 0 to keep track of surviving bytes for + // non-young regions (where the age is -1) + // We also add a few elements at the beginning and at the end in + // an attempt to eliminate cache contention + size_t real_length = 1 + _g1h->g1_policy()->young_cset_length(); + size_t array_length = PADDING_ELEM_NUM + + real_length + + PADDING_ELEM_NUM; + _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length); + if (_surviving_young_words_base == NULL) + vm_exit_out_of_memory(array_length * sizeof(size_t), + "Not enough space for young surv histo."); + _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM; + memset(_surviving_young_words, 0, real_length * sizeof(size_t)); + + _overflowed_refs = new OverflowQueue(10); + + _start = os::elapsedTime(); + } + + ~G1ParScanThreadState() { + FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base); + } + + RefToScanQueue* refs() { return _refs; } + OverflowQueue* overflowed_refs() { return _overflowed_refs; } + + inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { + return &_alloc_buffers[purpose]; + } + + size_t alloc_buffer_waste() { return _alloc_buffer_waste; } + size_t undo_waste() { return _undo_waste; } + + void push_on_queue(oop* ref) { + if (!refs()->push(ref)) { + overflowed_refs()->push(ref); + IF_G1_DETAILED_STATS(note_overflow_push()); + } else { + IF_G1_DETAILED_STATS(note_push()); + } + } + + void pop_from_queue(oop*& ref) { + if (!refs()->pop_local(ref)) { + ref = NULL; + } else { + IF_G1_DETAILED_STATS(note_pop()); + } + } + + void pop_from_overflow_queue(oop*& ref) { + ref = overflowed_refs()->pop(); + } + + int refs_to_scan() { return refs()->size(); } + int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { + + HeapWord* obj = NULL; + if (word_sz * 100 < + (size_t)(ParallelGCG1AllocBufferSize / HeapWordSize) * + ParallelGCBufferWastePct) { + G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose); + add_to_alloc_buffer_waste(alloc_buf->words_remaining()); + alloc_buf->retire(false, false); + + HeapWord* buf = + _g1h->par_allocate_during_gc(purpose, ParallelGCG1AllocBufferSize / HeapWordSize); + if (buf == NULL) return NULL; // Let caller handle allocation failure. + // Otherwise. + alloc_buf->set_buf(buf); + + obj = alloc_buf->allocate(word_sz); + assert(obj != NULL, "buffer was definitely big enough..."); + } + else { + obj = _g1h->par_allocate_during_gc(purpose, word_sz); + } + return obj; + } + + HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) { + HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz); + if (obj != NULL) return obj; + return allocate_slow(purpose, word_sz); + } + + void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) { + if (alloc_buffer(purpose)->contains(obj)) { + guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1), + "should contain whole object"); + alloc_buffer(purpose)->undo_allocation(obj, word_sz); + } + else { + SharedHeap::fill_region_with_object(MemRegion(obj, word_sz)); + add_to_undo_waste(word_sz); + } + } + + void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) { + _evac_failure_cl = evac_failure_cl; + } + OopsInHeapRegionClosure* evac_failure_closure() { + return _evac_failure_cl; + } + + void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) { + _evac_cl = evac_cl; + } + + void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) { + _partial_scan_cl = partial_scan_cl; + } + + int* hash_seed() { return &_hash_seed; } + int queue_num() { return _queue_num; } + + int term_attempts() { return _term_attempts; } + void note_term_attempt() { _term_attempts++; } + +#if G1_DETAILED_STATS + int pushes() { return _pushes; } + int pops() { return _pops; } + int steals() { return _steals; } + int steal_attempts() { return _steal_attempts; } + int overflow_pushes() { return _overflow_pushes; } + + void note_push() { _pushes++; } + void note_pop() { _pops++; } + void note_steal() { _steals++; } + void note_steal_attempt() { _steal_attempts++; } + void note_overflow_push() { _overflow_pushes++; } +#endif + + void start_strong_roots() { + _start_strong_roots = os::elapsedTime(); + } + void end_strong_roots() { + _strong_roots_time += (os::elapsedTime() - _start_strong_roots); + } + double strong_roots_time() { return _strong_roots_time; } + + void start_term_time() { + note_term_attempt(); + _start_term = os::elapsedTime(); + } + void end_term_time() { + _term_time += (os::elapsedTime() - _start_term); + } + double term_time() { return _term_time; } + + double elapsed() { + return os::elapsedTime() - _start; + } + + size_t* surviving_young_words() { + // We add on to hide entry 0 which accumulates surviving words for + // age -1 regions (i.e. non-young ones) + return _surviving_young_words; + } + + void retire_alloc_buffers() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + size_t waste = _alloc_buffers[ap].words_remaining(); + add_to_alloc_buffer_waste(waste); + _alloc_buffers[ap].retire(true, false); + } + } + + void trim_queue() { + while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) { + oop *ref_to_scan = NULL; + if (overflowed_refs_to_scan() == 0) { + pop_from_queue(ref_to_scan); + } else { + pop_from_overflow_queue(ref_to_scan); + } + if (ref_to_scan != NULL) { + if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) { + _partial_scan_cl->do_oop_nv(ref_to_scan); + } else { + // Note: we can use "raw" versions of "region_containing" because + // "obj_to_scan" is definitely in the heap, and is not in a + // humongous region. + HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); + _evac_cl->set_region(r); + _evac_cl->do_oop_nv(ref_to_scan); + } + } + } + } +}; + + +G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), + _par_scan_state(par_scan_state) { } + +// This closure is applied to the fields of the objects that have just been copied. +// Should probably be made inline and moved in g1OopClosures.inline.hpp. +void G1ParScanClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL) { + if (_g1->obj_in_cs(obj)) { + if (obj->is_forwarded()) { + *p = obj->forwardee(); + } else { + _par_scan_state->push_on_queue(p); + return; + } + } + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } +} + +void G1ParCopyHelper::mark_forwardee(oop* p) { + // This is called _after_ do_oop_work has been called, hence after + // the object has been relocated to its new location and *p points + // to its new location. + + oop thisOop = *p; + if (thisOop != NULL) { + assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)), + "shouldn't still be in the CSet if evacuation didn't fail."); + HeapWord* addr = (HeapWord*)thisOop; + if (_g1->is_in_g1_reserved(addr)) + _cm->grayRoot(oop(addr)); + } +} + +oop G1ParCopyHelper::copy_to_survivor_space(oop old) { + size_t word_sz = old->size(); + HeapRegion* from_region = _g1->heap_region_containing_raw(old); + // +1 to make the -1 indexes valid... + int young_index = from_region->young_index_in_cset()+1; + assert( (from_region->is_young() && young_index > 0) || + (!from_region->is_young() && young_index == 0), "invariant" ); + G1CollectorPolicy* g1p = _g1->g1_policy(); + markOop m = old->mark(); + GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(), + word_sz); + HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz); + oop obj = oop(obj_ptr); + + if (obj_ptr == NULL) { + // This will either forward-to-self, or detect that someone else has + // installed a forwarding pointer. + OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure(); + return _g1->handle_evacuation_failure_par(cl, old); + } + + oop forward_ptr = old->forward_to_atomic(obj); + if (forward_ptr == NULL) { + Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz); + obj->set_mark(m); + if (g1p->track_object_age(alloc_purpose)) { + obj->incr_age(); + } + // preserve "next" mark bit + if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) { + if (!use_local_bitmaps || + !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) { + // if we couldn't mark it on the local bitmap (this happens when + // the object was not allocated in the GCLab), we have to bite + // the bullet and do the standard parallel mark + _cm->markAndGrayObjectIfNecessary(obj); + } +#if 1 + if (_g1->isMarkedNext(old)) { + _cm->nextMarkBitMap()->parClear((HeapWord*)old); + } +#endif + } + + size_t* surv_young_words = _par_scan_state->surviving_young_words(); + surv_young_words[young_index] += word_sz; + + if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) { + arrayOop(old)->set_length(0); + _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK)); + } else { + _scanner->set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate_backwards(_scanner); + } + } else { + _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz); + obj = forward_ptr; + } + return obj; +} + +template +void G1ParCopyClosure::do_oop_work(oop* p) { + oop obj = *p; + assert(barrier != G1BarrierRS || obj != NULL, + "Precondition: G1BarrierRS implies obj is nonNull"); + + if (obj != NULL) { + if (_g1->obj_in_cs(obj)) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.", + p, (void*) obj); +#endif + if (obj->is_forwarded()) { + *p = obj->forwardee(); + } else { + *p = copy_to_survivor_space(obj); + } + // When scanning the RS, we only care about objs in CS. + if (barrier == G1BarrierRS) { + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } + } + // When scanning moved objs, must look at all oops. + if (barrier == G1BarrierEvac) { + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } + + if (do_gen_barrier) { + par_do_barrier(p); + } + } +} + +template void G1ParCopyClosure::do_oop_work(oop* p); + +template void G1ParScanPartialArrayClosure::process_array_chunk( + oop obj, int start, int end) { + // process our set of indices (include header in first chunk) + assert(start < end, "invariant"); + T* const base = (T*)objArrayOop(obj)->base(); + T* const start_addr = base + start; + T* const end_addr = base + end; + MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); + _scanner.set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate(&_scanner, mr); +} + +void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) { + assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops"); + oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK); + assert(old->is_objArray(), "must be obj array"); + assert(old->is_forwarded(), "must be forwarded"); + assert(Universe::heap()->is_in_reserved(old), "must be in heap."); + + objArrayOop obj = objArrayOop(old->forwardee()); + assert((void*)old != (void*)old->forwardee(), "self forwarding here?"); + // Process ParGCArrayScanChunk elements now + // and push the remainder back onto queue + int start = arrayOop(old)->length(); + int end = obj->length(); + int remainder = end - start; + assert(start <= end, "just checking"); + if (remainder > 2 * ParGCArrayScanChunk) { + // Test above combines last partial chunk with a full chunk + end = start + ParGCArrayScanChunk; + arrayOop(old)->set_length(end); + // Push remainder. + _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK)); + } else { + // Restore length so that the heap remains parsable in + // case of evacuation failure. + arrayOop(old)->set_length(end); + } + + // process our set of indices (include header in first chunk) + process_array_chunk(obj, start, end); + oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr(start); + oop* end_addr = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length + MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); + _scanner.set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate(&_scanner, mr); +} + +int G1ScanAndBalanceClosure::_nq = 0; + +class G1ParEvacuateFollowersClosure : public VoidClosure { +protected: + G1CollectedHeap* _g1h; + G1ParScanThreadState* _par_scan_state; + RefToScanQueueSet* _queues; + ParallelTaskTerminator* _terminator; + + G1ParScanThreadState* par_scan_state() { return _par_scan_state; } + RefToScanQueueSet* queues() { return _queues; } + ParallelTaskTerminator* terminator() { return _terminator; } + +public: + G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h, + G1ParScanThreadState* par_scan_state, + RefToScanQueueSet* queues, + ParallelTaskTerminator* terminator) + : _g1h(g1h), _par_scan_state(par_scan_state), + _queues(queues), _terminator(terminator) {} + + void do_void() { + G1ParScanThreadState* pss = par_scan_state(); + while (true) { + oop* ref_to_scan; + pss->trim_queue(); + IF_G1_DETAILED_STATS(pss->note_steal_attempt()); + if (queues()->steal(pss->queue_num(), + pss->hash_seed(), + ref_to_scan)) { + IF_G1_DETAILED_STATS(pss->note_steal()); + pss->push_on_queue(ref_to_scan); + continue; + } + pss->start_term_time(); + if (terminator()->offer_termination()) break; + pss->end_term_time(); + } + pss->end_term_time(); + pss->retire_alloc_buffers(); + } +}; + +class G1ParTask : public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + RefToScanQueueSet *_queues; + ParallelTaskTerminator _terminator; + + Mutex _stats_lock; + Mutex* stats_lock() { return &_stats_lock; } + + size_t getNCards() { + return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) + / G1BlockOffsetSharedArray::N_bytes; + } + +public: + G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) + : AbstractGangTask("G1 collection"), + _g1h(g1h), + _queues(task_queues), + _terminator(workers, _queues), + _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) + {} + + RefToScanQueueSet* queues() { return _queues; } + + RefToScanQueue *work_queue(int i) { + return queues()->queue(i); + } + + void work(int i) { + ResourceMark rm; + HandleMark hm; + + G1ParScanThreadState pss(_g1h, i); + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss); + G1ParScanHeapEvacClosure evac_failure_cl(_g1h, &pss); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss); + + pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); + pss.set_partial_scan_closure(&partial_scan_cl); + + G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); + G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); + G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); + G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); + G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); + + OopsInHeapRegionClosure *scan_root_cl; + OopsInHeapRegionClosure *scan_perm_cl; + OopsInHeapRegionClosure *scan_so_cl; + + if (_g1h->g1_policy()->should_initiate_conc_mark()) { + scan_root_cl = &scan_mark_root_cl; + scan_perm_cl = &scan_mark_perm_cl; + scan_so_cl = &scan_mark_heap_rs_cl; + } else { + scan_root_cl = &only_scan_root_cl; + scan_perm_cl = &only_scan_perm_cl; + scan_so_cl = &only_scan_heap_rs_cl; + } + + pss.start_strong_roots(); + _g1h->g1_process_strong_roots(/* not collecting perm */ false, + SharedHeap::SO_AllClasses, + scan_root_cl, + &only_scan_heap_rs_cl, + scan_so_cl, + scan_perm_cl, + i); + pss.end_strong_roots(); + { + double start = os::elapsedTime(); + G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); + evac.do_void(); + double elapsed_ms = (os::elapsedTime()-start)*1000.0; + double term_ms = pss.term_time()*1000.0; + _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms); + _g1h->g1_policy()->record_termination_time(i, term_ms); + } + _g1h->update_surviving_young_words(pss.surviving_young_words()+1); + + // Clean up any par-expanded rem sets. + HeapRegionRemSet::par_cleanup(); + + MutexLocker x(stats_lock()); + if (ParallelGCVerbose) { + gclog_or_tty->print("Thread %d complete:\n", i); +#if G1_DETAILED_STATS + gclog_or_tty->print(" Pushes: %7d Pops: %7d Overflows: %7d Steals %7d (in %d attempts)\n", + pss.pushes(), + pss.pops(), + pss.overflow_pushes(), + pss.steals(), + pss.steal_attempts()); +#endif + double elapsed = pss.elapsed(); + double strong_roots = pss.strong_roots_time(); + double term = pss.term_time(); + gclog_or_tty->print(" Elapsed: %7.2f ms.\n" + " Strong roots: %7.2f ms (%6.2f%%)\n" + " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", + elapsed * 1000.0, + strong_roots * 1000.0, (strong_roots*100.0/elapsed), + term * 1000.0, (term*100.0/elapsed), + pss.term_attempts()); + size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste(); + gclog_or_tty->print(" Waste: %8dK\n" + " Alloc Buffer: %8dK\n" + " Undo: %8dK\n", + (total_waste * HeapWordSize) / K, + (pss.alloc_buffer_waste() * HeapWordSize) / K, + (pss.undo_waste() * HeapWordSize) / K); + } + + assert(pss.refs_to_scan() == 0, "Task queue should be empty"); + assert(pss.overflowed_refs_to_scan() == 0, "Overflow queue should be empty"); + } +}; + +// *** Common G1 Evacuation Stuff + +class G1CountClosure: public OopsInHeapRegionClosure { +public: + int n; + G1CountClosure() : n(0) {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; + assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj), + "Rem set closure called on non-rem-set pointer."); + n++; + } +}; + +static G1CountClosure count_closure; + +void +G1CollectedHeap:: +g1_process_strong_roots(bool collecting_perm_gen, + SharedHeap::ScanningOption so, + OopClosure* scan_non_heap_roots, + OopsInHeapRegionClosure* scan_rs, + OopsInHeapRegionClosure* scan_so, + OopsInGenClosure* scan_perm, + int worker_i) { + // First scan the strong roots, including the perm gen. + double ext_roots_start = os::elapsedTime(); + double closure_app_time_sec = 0.0; + + BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); + BufferingOopsInGenClosure buf_scan_perm(scan_perm); + buf_scan_perm.set_generation(perm_gen()); + + process_strong_roots(collecting_perm_gen, so, + &buf_scan_non_heap_roots, + &buf_scan_perm); + // Finish up any enqueued closure apps. + buf_scan_non_heap_roots.done(); + buf_scan_perm.done(); + double ext_roots_end = os::elapsedTime(); + g1_policy()->reset_obj_copy_time(worker_i); + double obj_copy_time_sec = + buf_scan_non_heap_roots.closure_app_seconds() + + buf_scan_perm.closure_app_seconds(); + g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); + double ext_root_time_ms = + ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0; + g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms); + + // Scan strong roots in mark stack. + if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) { + concurrent_mark()->oops_do(scan_non_heap_roots); + } + double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0; + g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms); + + // XXX What should this be doing in the parallel case? + g1_policy()->record_collection_pause_end_CH_strong_roots(); + if (G1VerifyRemSet) { + // :::: FIXME :::: + // The stupid remembered set doesn't know how to filter out dead + // objects, which the smart one does, and so when it is created + // and then compared the number of entries in each differs and + // the verification code fails. + guarantee(false, "verification code is broken, see note"); + + // Let's make sure that the current rem set agrees with the stupidest + // one possible! + bool refs_enabled = ref_processor()->discovery_enabled(); + if (refs_enabled) ref_processor()->disable_discovery(); + StupidG1RemSet stupid(this); + count_closure.n = 0; + stupid.oops_into_collection_set_do(&count_closure, worker_i); + int stupid_n = count_closure.n; + count_closure.n = 0; + g1_rem_set()->oops_into_collection_set_do(&count_closure, worker_i); + guarantee(count_closure.n == stupid_n, "Old and new rem sets differ."); + gclog_or_tty->print_cr("\nFound %d pointers in heap RS.", count_closure.n); + if (refs_enabled) ref_processor()->enable_discovery(); + } + if (scan_so != NULL) { + scan_scan_only_set(scan_so, worker_i); + } + // Now scan the complement of the collection set. + if (scan_rs != NULL) { + g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i); + } + // Finish with the ref_processor roots. + if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { + ref_processor()->oops_do(scan_non_heap_roots); + } + g1_policy()->record_collection_pause_end_G1_strong_roots(); + _process_strong_tasks->all_tasks_completed(); +} + +void +G1CollectedHeap::scan_scan_only_region(HeapRegion* r, + OopsInHeapRegionClosure* oc, + int worker_i) { + HeapWord* startAddr = r->bottom(); + HeapWord* endAddr = r->used_region().end(); + + oc->set_region(r); + + HeapWord* p = r->bottom(); + HeapWord* t = r->top(); + guarantee( p == r->next_top_at_mark_start(), "invariant" ); + while (p < t) { + oop obj = oop(p); + p += obj->oop_iterate(oc); + } +} + +void +G1CollectedHeap::scan_scan_only_set(OopsInHeapRegionClosure* oc, + int worker_i) { + double start = os::elapsedTime(); + + BufferingOopsInHeapRegionClosure boc(oc); + + FilterInHeapRegionAndIntoCSClosure scan_only(this, &boc); + FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark()); + + OopsInHeapRegionClosure *foc; + if (g1_policy()->should_initiate_conc_mark()) + foc = &scan_and_mark; + else + foc = &scan_only; + + HeapRegion* hr; + int n = 0; + while ((hr = _young_list->par_get_next_scan_only_region()) != NULL) { + scan_scan_only_region(hr, foc, worker_i); + ++n; + } + boc.done(); + + double closure_app_s = boc.closure_app_seconds(); + g1_policy()->record_obj_copy_time(worker_i, closure_app_s * 1000.0); + double ms = (os::elapsedTime() - start - closure_app_s)*1000.0; + g1_policy()->record_scan_only_time(worker_i, ms, n); +} + +void +G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure, + OopClosure* non_root_closure) { + SharedHeap::process_weak_roots(root_closure, non_root_closure); +} + + +class SaveMarksClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + r->save_marks(); + return false; + } +}; + +void G1CollectedHeap::save_marks() { + if (ParallelGCThreads == 0) { + SaveMarksClosure sm; + heap_region_iterate(&sm); + } + // We do this even in the parallel case + perm_gen()->save_marks(); +} + +void G1CollectedHeap::evacuate_collection_set() { + set_evacuation_failed(false); + + g1_rem_set()->prepare_for_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(false); + int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + + set_par_threads(n_workers); + G1ParTask g1_par_task(this, n_workers, _task_queues); + + init_for_evac_failure(NULL); + + change_strong_roots_parity(); // In preparation for parallel strong roots. + rem_set()->prepare_for_younger_refs_iterate(true); + double start_par = os::elapsedTime(); + + if (ParallelGCThreads > 0) { + // The individual threads will set their evac-failure closures. + workers()->run_task(&g1_par_task); + } else { + g1_par_task.work(0); + } + + double par_time = (os::elapsedTime() - start_par) * 1000.0; + g1_policy()->record_par_time(par_time); + set_par_threads(0); + // Is this the right thing to do here? We don't save marks + // on individual heap regions when we allocate from + // them in parallel, so this seems like the correct place for this. + all_alloc_regions_note_end_of_copying(); + { + G1IsAliveClosure is_alive(this); + G1KeepAliveClosure keep_alive(this); + JNIHandles::weak_oops_do(&is_alive, &keep_alive); + } + + g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); + + finalize_for_evac_failure(); + + // Must do this before removing self-forwarding pointers, which clears + // the per-region evac-failure flags. + concurrent_mark()->complete_marking_in_collection_set(); + + if (evacuation_failed()) { + remove_self_forwarding_pointers(); + + if (PrintGCDetails) { + gclog_or_tty->print(" (evacuation failed)"); + } else if (PrintGC) { + gclog_or_tty->print("--"); + } + } + + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); +} + +void G1CollectedHeap::free_region(HeapRegion* hr) { + size_t pre_used = 0; + size_t cleared_h_regions = 0; + size_t freed_regions = 0; + UncleanRegionList local_list; + + HeapWord* start = hr->bottom(); + HeapWord* end = hr->prev_top_at_mark_start(); + size_t used_bytes = hr->used(); + size_t live_bytes = hr->max_live_bytes(); + if (used_bytes > 0) { + guarantee( live_bytes <= used_bytes, "invariant" ); + } else { + guarantee( live_bytes == 0, "invariant" ); + } + + size_t garbage_bytes = used_bytes - live_bytes; + if (garbage_bytes > 0) + g1_policy()->decrease_known_garbage_bytes(garbage_bytes); + + free_region_work(hr, pre_used, cleared_h_regions, freed_regions, + &local_list); + finish_free_region_work(pre_used, cleared_h_regions, freed_regions, + &local_list); +} + +void +G1CollectedHeap::free_region_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h_regions, + size_t& freed_regions, + UncleanRegionList* list, + bool par) { + assert(!hr->popular(), "should not free popular regions"); + pre_used += hr->used(); + if (hr->isHumongous()) { + assert(hr->startsHumongous(), + "Only the start of a humongous region should be freed."); + int ind = _hrs->find(hr); + assert(ind != -1, "Should have an index."); + // Clear the start region. + hr->hr_clear(par, true /*clear_space*/); + list->insert_before_head(hr); + cleared_h_regions++; + freed_regions++; + // Clear any continued regions. + ind++; + while ((size_t)ind < n_regions()) { + HeapRegion* hrc = _hrs->at(ind); + if (!hrc->continuesHumongous()) break; + // Otherwise, does continue the H region. + assert(hrc->humongous_start_region() == hr, "Huh?"); + hrc->hr_clear(par, true /*clear_space*/); + cleared_h_regions++; + freed_regions++; + list->insert_before_head(hrc); + ind++; + } + } else { + hr->hr_clear(par, true /*clear_space*/); + list->insert_before_head(hr); + freed_regions++; + // If we're using clear2, this should not be enabled. + // assert(!hr->in_cohort(), "Can't be both free and in a cohort."); + } +} + +void G1CollectedHeap::finish_free_region_work(size_t pre_used, + size_t cleared_h_regions, + size_t freed_regions, + UncleanRegionList* list) { + if (list != NULL && list->sz() > 0) { + prepend_region_list_on_unclean_list(list); + } + // Acquire a lock, if we're parallel, to update possibly-shared + // variables. + Mutex* lock = (n_par_threads() > 0) ? ParGCRareEvent_lock : NULL; + { + MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag); + _summary_bytes_used -= pre_used; + _num_humongous_regions -= (int) cleared_h_regions; + _free_regions += freed_regions; + } +} + + +void G1CollectedHeap::dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list) { + while (list != NULL) { + guarantee( list->is_young(), "invariant" ); + + HeapWord* bottom = list->bottom(); + HeapWord* end = list->end(); + MemRegion mr(bottom, end); + ct_bs->dirty(mr); + + list = list->get_next_young_region(); + } +} + +void G1CollectedHeap::cleanUpCardTable() { + CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); + double start = os::elapsedTime(); + + ct_bs->clear(_g1_committed); + + // now, redirty the cards of the scan-only and survivor regions + // (it seemed faster to do it this way, instead of iterating over + // all regions and then clearing / dirtying as approprite) + dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region()); + dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region()); + + double elapsed = os::elapsedTime() - start; + g1_policy()->record_clear_ct_time( elapsed * 1000.0); +} + + +void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) { + // First do any popular regions. + HeapRegion* hr; + while ((hr = popular_region_to_evac()) != NULL) { + evac_popular_region(hr); + } + // Now do heuristic pauses. + if (g1_policy()->should_do_collection_pause(word_size)) { + do_collection_pause(); + } +} + +void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) { + double young_time_ms = 0.0; + double non_young_time_ms = 0.0; + + G1CollectorPolicy* policy = g1_policy(); + + double start_sec = os::elapsedTime(); + bool non_young = true; + + HeapRegion* cur = cs_head; + int age_bound = -1; + size_t rs_lengths = 0; + + while (cur != NULL) { + if (non_young) { + if (cur->is_young()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + non_young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = false; + } + } else { + if (!cur->is_on_free_list()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = true; + } + } + + rs_lengths += cur->rem_set()->occupied(); + + HeapRegion* next = cur->next_in_collection_set(); + assert(cur->in_collection_set(), "bad CS"); + cur->set_next_in_collection_set(NULL); + cur->set_in_collection_set(false); + + if (cur->is_young()) { + int index = cur->young_index_in_cset(); + guarantee( index != -1, "invariant" ); + guarantee( (size_t)index < policy->young_cset_length(), "invariant" ); + size_t words_survived = _surviving_young_words[index]; + cur->record_surv_words_in_group(words_survived); + } else { + int index = cur->young_index_in_cset(); + guarantee( index == -1, "invariant" ); + } + + assert( (cur->is_young() && cur->young_index_in_cset() > -1) || + (!cur->is_young() && cur->young_index_in_cset() == -1), + "invariant" ); + + if (!cur->evacuation_failed()) { + // And the region is empty. + assert(!cur->is_empty(), + "Should not have empty regions in a CS."); + free_region(cur); + } else { + guarantee( !cur->is_scan_only(), "should not be scan only" ); + cur->uninstall_surv_rate_group(); + if (cur->is_young()) + cur->set_young_index_in_cset(-1); + cur->set_not_young(); + cur->set_evacuation_failed(false); + } + cur = next; + } + + policy->record_max_rs_lengths(rs_lengths); + policy->cset_regions_freed(); + + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + if (non_young) + non_young_time_ms += elapsed_ms; + else + young_time_ms += elapsed_ms; + + policy->record_young_free_cset_time_ms(young_time_ms); + policy->record_non_young_free_cset_time_ms(non_young_time_ms); +} + +HeapRegion* +G1CollectedHeap::alloc_region_from_unclean_list_locked(bool zero_filled) { + assert(ZF_mon->owned_by_self(), "Precondition"); + HeapRegion* res = pop_unclean_region_list_locked(); + if (res != NULL) { + assert(!res->continuesHumongous() && + res->zero_fill_state() != HeapRegion::Allocated, + "Only free regions on unclean list."); + if (zero_filled) { + res->ensure_zero_filled_locked(); + res->set_zero_fill_allocated(); + } + } + return res; +} + +HeapRegion* G1CollectedHeap::alloc_region_from_unclean_list(bool zero_filled) { + MutexLockerEx zx(ZF_mon, Mutex::_no_safepoint_check_flag); + return alloc_region_from_unclean_list_locked(zero_filled); +} + +void G1CollectedHeap::put_region_on_unclean_list(HeapRegion* r) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + put_region_on_unclean_list_locked(r); + if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread. +} + +void G1CollectedHeap::set_unclean_regions_coming(bool b) { + MutexLockerEx x(Cleanup_mon); + set_unclean_regions_coming_locked(b); +} + +void G1CollectedHeap::set_unclean_regions_coming_locked(bool b) { + assert(Cleanup_mon->owned_by_self(), "Precondition"); + _unclean_regions_coming = b; + // Wake up mutator threads that might be waiting for completeCleanup to + // finish. + if (!b) Cleanup_mon->notify_all(); +} + +void G1CollectedHeap::wait_for_cleanup_complete() { + MutexLockerEx x(Cleanup_mon); + wait_for_cleanup_complete_locked(); +} + +void G1CollectedHeap::wait_for_cleanup_complete_locked() { + assert(Cleanup_mon->owned_by_self(), "precondition"); + while (_unclean_regions_coming) { + Cleanup_mon->wait(); + } +} + +void +G1CollectedHeap::put_region_on_unclean_list_locked(HeapRegion* r) { + assert(ZF_mon->owned_by_self(), "precondition."); + _unclean_region_list.insert_before_head(r); +} + +void +G1CollectedHeap::prepend_region_list_on_unclean_list(UncleanRegionList* list) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + prepend_region_list_on_unclean_list_locked(list); + if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread. +} + +void +G1CollectedHeap:: +prepend_region_list_on_unclean_list_locked(UncleanRegionList* list) { + assert(ZF_mon->owned_by_self(), "precondition."); + _unclean_region_list.prepend_list(list); +} + +HeapRegion* G1CollectedHeap::pop_unclean_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + HeapRegion* res = _unclean_region_list.pop(); + if (res != NULL) { + // Inform ZF thread that there's a new unclean head. + if (_unclean_region_list.hd() != NULL && should_zf()) + ZF_mon->notify_all(); + } + return res; +} + +HeapRegion* G1CollectedHeap::peek_unclean_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + return _unclean_region_list.hd(); +} + + +bool G1CollectedHeap::move_cleaned_region_to_free_list_locked() { + assert(ZF_mon->owned_by_self(), "Precondition"); + HeapRegion* r = peek_unclean_region_list_locked(); + if (r != NULL && r->zero_fill_state() == HeapRegion::ZeroFilled) { + // Result of below must be equal to "r", since we hold the lock. + (void)pop_unclean_region_list_locked(); + put_free_region_on_list_locked(r); + return true; + } else { + return false; + } +} + +bool G1CollectedHeap::move_cleaned_region_to_free_list() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + return move_cleaned_region_to_free_list_locked(); +} + + +void G1CollectedHeap::put_free_region_on_list_locked(HeapRegion* r) { + assert(ZF_mon->owned_by_self(), "precondition."); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + assert(r->zero_fill_state() == HeapRegion::ZeroFilled, + "Regions on free list must be zero filled"); + assert(!r->isHumongous(), "Must not be humongous."); + assert(r->is_empty(), "Better be empty"); + assert(!r->is_on_free_list(), + "Better not already be on free list"); + assert(!r->is_on_unclean_list(), + "Better not already be on unclean list"); + r->set_on_free_list(true); + r->set_next_on_free_list(_free_region_list); + _free_region_list = r; + _free_region_list_size++; + assert(_free_region_list_size == free_region_list_length(), "Inv"); +} + +void G1CollectedHeap::put_free_region_on_list(HeapRegion* r) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + put_free_region_on_list_locked(r); +} + +HeapRegion* G1CollectedHeap::pop_free_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + HeapRegion* res = _free_region_list; + if (res != NULL) { + _free_region_list = res->next_from_free_list(); + _free_region_list_size--; + res->set_on_free_list(false); + res->set_next_on_free_list(NULL); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + } + return res; +} + + +HeapRegion* G1CollectedHeap::alloc_free_region_from_lists(bool zero_filled) { + // By self, or on behalf of self. + assert(Heap_lock->is_locked(), "Precondition"); + HeapRegion* res = NULL; + bool first = true; + while (res == NULL) { + if (zero_filled || !first) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + res = pop_free_region_list_locked(); + if (res != NULL) { + assert(!res->zero_fill_is_allocated(), + "No allocated regions on free list."); + res->set_zero_fill_allocated(); + } else if (!first) { + break; // We tried both, time to return NULL. + } + } + + if (res == NULL) { + res = alloc_region_from_unclean_list(zero_filled); + } + assert(res == NULL || + !zero_filled || + res->zero_fill_is_allocated(), + "We must have allocated the region we're returning"); + first = false; + } + return res; +} + +void G1CollectedHeap::remove_allocated_regions_from_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + { + HeapRegion* prev = NULL; + HeapRegion* cur = _unclean_region_list.hd(); + while (cur != NULL) { + HeapRegion* next = cur->next_from_unclean_list(); + if (cur->zero_fill_is_allocated()) { + // Remove from the list. + if (prev == NULL) { + (void)_unclean_region_list.pop(); + } else { + _unclean_region_list.delete_after(prev); + } + cur->set_on_unclean_list(false); + cur->set_next_on_unclean_list(NULL); + } else { + prev = cur; + } + cur = next; + } + assert(_unclean_region_list.sz() == unclean_region_list_length(), + "Inv"); + } + + { + HeapRegion* prev = NULL; + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + HeapRegion* next = cur->next_from_free_list(); + if (cur->zero_fill_is_allocated()) { + // Remove from the list. + if (prev == NULL) { + _free_region_list = cur->next_from_free_list(); + } else { + prev->set_next_on_free_list(cur->next_from_free_list()); + } + cur->set_on_free_list(false); + cur->set_next_on_free_list(NULL); + _free_region_list_size--; + } else { + prev = cur; + } + cur = next; + } + assert(_free_region_list_size == free_region_list_length(), "Inv"); + } +} + +bool G1CollectedHeap::verify_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + return verify_region_lists_locked(); +} + +bool G1CollectedHeap::verify_region_lists_locked() { + HeapRegion* unclean = _unclean_region_list.hd(); + while (unclean != NULL) { + guarantee(unclean->is_on_unclean_list(), "Well, it is!"); + guarantee(!unclean->is_on_free_list(), "Well, it shouldn't be!"); + guarantee(unclean->zero_fill_state() != HeapRegion::Allocated, + "Everything else is possible."); + unclean = unclean->next_from_unclean_list(); + } + guarantee(_unclean_region_list.sz() == unclean_region_list_length(), "Inv"); + + HeapRegion* free_r = _free_region_list; + while (free_r != NULL) { + assert(free_r->is_on_free_list(), "Well, it is!"); + assert(!free_r->is_on_unclean_list(), "Well, it shouldn't be!"); + switch (free_r->zero_fill_state()) { + case HeapRegion::NotZeroFilled: + case HeapRegion::ZeroFilling: + guarantee(false, "Should not be on free list."); + break; + default: + // Everything else is possible. + break; + } + free_r = free_r->next_from_free_list(); + } + guarantee(_free_region_list_size == free_region_list_length(), "Inv"); + // If we didn't do an assertion... + return true; +} + +size_t G1CollectedHeap::free_region_list_length() { + assert(ZF_mon->owned_by_self(), "precondition."); + size_t len = 0; + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + len++; + cur = cur->next_from_free_list(); + } + return len; +} + +size_t G1CollectedHeap::unclean_region_list_length() { + assert(ZF_mon->owned_by_self(), "precondition."); + return _unclean_region_list.length(); +} + +size_t G1CollectedHeap::n_regions() { + return _hrs->length(); +} + +size_t G1CollectedHeap::max_regions() { + return + (size_t)align_size_up(g1_reserved_obj_bytes(), HeapRegion::GrainBytes) / + HeapRegion::GrainBytes; +} + +size_t G1CollectedHeap::free_regions() { + /* Possibly-expensive assert. + assert(_free_regions == count_free_regions(), + "_free_regions is off."); + */ + return _free_regions; +} + +bool G1CollectedHeap::should_zf() { + return _free_region_list_size < (size_t) G1ConcZFMaxRegions; +} + +class RegionCounter: public HeapRegionClosure { + size_t _n; +public: + RegionCounter() : _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->is_empty() && !r->popular()) { + assert(!r->isHumongous(), "H regions should not be empty."); + _n++; + } + return false; + } + int res() { return (int) _n; } +}; + +size_t G1CollectedHeap::count_free_regions() { + RegionCounter rc; + heap_region_iterate(&rc); + size_t n = rc.res(); + if (_cur_alloc_region != NULL && _cur_alloc_region->is_empty()) + n--; + return n; +} + +size_t G1CollectedHeap::count_free_regions_list() { + size_t n = 0; + size_t o = 0; + ZF_mon->lock_without_safepoint_check(); + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + cur = cur->next_from_free_list(); + n++; + } + size_t m = unclean_region_list_length(); + ZF_mon->unlock(); + return n + m; +} + +bool G1CollectedHeap::should_set_young_locked() { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + return (g1_policy()->in_young_gc_mode() && + g1_policy()->should_add_next_region_to_young_list()); +} + +void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + _young_list->push_region(hr); + g1_policy()->set_region_short_lived(hr); +} + +class NoYoungRegionsClosure: public HeapRegionClosure { +private: + bool _success; +public: + NoYoungRegionsClosure() : _success(true) { } + bool doHeapRegion(HeapRegion* r) { + if (r->is_young()) { + gclog_or_tty->print_cr("Region ["PTR_FORMAT", "PTR_FORMAT") tagged as young", + r->bottom(), r->end()); + _success = false; + } + return false; + } + bool success() { return _success; } +}; + +bool G1CollectedHeap::check_young_list_empty(bool ignore_scan_only_list, + bool check_sample) { + bool ret = true; + + ret = _young_list->check_list_empty(ignore_scan_only_list, check_sample); + if (!ignore_scan_only_list) { + NoYoungRegionsClosure closure; + heap_region_iterate(&closure); + ret = ret && closure.success(); + } + + return ret; +} + +void G1CollectedHeap::empty_young_list() { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode"); + + _young_list->empty_list(); +} + +bool G1CollectedHeap::all_alloc_regions_no_allocs_since_save_marks() { + bool no_allocs = true; + for (int ap = 0; ap < GCAllocPurposeCount && no_allocs; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + no_allocs = r == NULL || r->saved_mark_at_top(); + } + return no_allocs; +} + +void G1CollectedHeap::all_alloc_regions_note_end_of_copying() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + if (r != NULL) { + // Check for aliases. + bool has_processed_alias = false; + for (int i = 0; i < ap; ++i) { + if (_gc_alloc_regions[i] == r) { + has_processed_alias = true; + break; + } + } + if (!has_processed_alias) { + r->note_end_of_copying(); + g1_policy()->record_after_bytes(r->used()); + } + } + } +} + + +// Done at the start of full GC. +void G1CollectedHeap::tear_down_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + while (pop_unclean_region_list_locked() != NULL) ; + assert(_unclean_region_list.hd() == NULL && _unclean_region_list.sz() == 0, + "Postconditions of loop.") + while (pop_free_region_list_locked() != NULL) ; + assert(_free_region_list == NULL, "Postcondition of loop."); + if (_free_region_list_size != 0) { + gclog_or_tty->print_cr("Size is %d.", _free_region_list_size); + print(); + } + assert(_free_region_list_size == 0, "Postconditions of loop."); +} + + +class RegionResetter: public HeapRegionClosure { + G1CollectedHeap* _g1; + int _n; +public: + RegionResetter() : _g1(G1CollectedHeap::heap()), _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->top() > r->bottom()) { + if (r->top() < r->end()) { + Copy::fill_to_words(r->top(), + pointer_delta(r->end(), r->top())); + } + r->set_zero_fill_allocated(); + } else { + assert(r->is_empty(), "tautology"); + if (r->popular()) { + if (r->zero_fill_state() != HeapRegion::Allocated) { + r->ensure_zero_filled_locked(); + r->set_zero_fill_allocated(); + } + } else { + _n++; + switch (r->zero_fill_state()) { + case HeapRegion::NotZeroFilled: + case HeapRegion::ZeroFilling: + _g1->put_region_on_unclean_list_locked(r); + break; + case HeapRegion::Allocated: + r->set_zero_fill_complete(); + // no break; go on to put on free list. + case HeapRegion::ZeroFilled: + _g1->put_free_region_on_list_locked(r); + break; + } + } + } + return false; + } + + int getFreeRegionCount() {return _n;} +}; + +// Done at the end of full GC. +void G1CollectedHeap::rebuild_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + // This needs to go at the end of the full GC. + RegionResetter rs; + heap_region_iterate(&rs); + _free_regions = rs.getFreeRegionCount(); + // Tell the ZF thread it may have work to do. + if (should_zf()) ZF_mon->notify_all(); +} + +class UsedRegionsNeedZeroFillSetter: public HeapRegionClosure { + G1CollectedHeap* _g1; + int _n; +public: + UsedRegionsNeedZeroFillSetter() : _g1(G1CollectedHeap::heap()), _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->top() > r->bottom()) { + // There are assertions in "set_zero_fill_needed()" below that + // require top() == bottom(), so this is technically illegal. + // We'll skirt the law here, by making that true temporarily. + DEBUG_ONLY(HeapWord* save_top = r->top(); + r->set_top(r->bottom())); + r->set_zero_fill_needed(); + DEBUG_ONLY(r->set_top(save_top)); + } + return false; + } +}; + +// Done at the start of full GC. +void G1CollectedHeap::set_used_regions_to_need_zero_fill() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + // This needs to go at the end of the full GC. + UsedRegionsNeedZeroFillSetter rs; + heap_region_iterate(&rs); +} + +class CountObjClosure: public ObjectClosure { + size_t _n; +public: + CountObjClosure() : _n(0) {} + void do_object(oop obj) { _n++; } + size_t n() { return _n; } +}; + +size_t G1CollectedHeap::pop_object_used_objs() { + size_t sum_objs = 0; + for (int i = 0; i < G1NumPopularRegions; i++) { + CountObjClosure cl; + _hrs->at(i)->object_iterate(&cl); + sum_objs += cl.n(); + } + return sum_objs; +} + +size_t G1CollectedHeap::pop_object_used_bytes() { + size_t sum_bytes = 0; + for (int i = 0; i < G1NumPopularRegions; i++) { + sum_bytes += _hrs->at(i)->used(); + } + return sum_bytes; +} + + +static int nq = 0; + +HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) { + while (_cur_pop_hr_index < G1NumPopularRegions) { + HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); + HeapWord* res = cur_pop_region->allocate(word_size); + if (res != NULL) { + // We account for popular objs directly in the used summary: + _summary_bytes_used += (word_size * HeapWordSize); + return res; + } + // Otherwise, try the next region (first making sure that we remember + // the last "top" value as the "next_top_at_mark_start", so that + // objects made popular during markings aren't automatically considered + // live). + cur_pop_region->note_end_of_copying(); + // Otherwise, try the next region. + _cur_pop_hr_index++; + } + // XXX: For now !!! + vm_exit_out_of_memory(word_size, + "Not enough pop obj space (To Be Fixed)"); + return NULL; +} + +class HeapRegionList: public CHeapObj { + public: + HeapRegion* hr; + HeapRegionList* next; +}; + +void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) { + // This might happen during parallel GC, so protect by this lock. + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + // We don't schedule regions whose evacuations are already pending, or + // are already being evacuated. + if (!r->popular_pending() && !r->in_collection_set()) { + r->set_popular_pending(true); + if (G1TracePopularity) { + gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.", + r, r->bottom(), r->end()); + } + HeapRegionList* hrl = new HeapRegionList; + hrl->hr = r; + hrl->next = _popular_regions_to_be_evacuated; + _popular_regions_to_be_evacuated = hrl; + } +} + +HeapRegion* G1CollectedHeap::popular_region_to_evac() { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + HeapRegion* res = NULL; + while (_popular_regions_to_be_evacuated != NULL && res == NULL) { + HeapRegionList* hrl = _popular_regions_to_be_evacuated; + _popular_regions_to_be_evacuated = hrl->next; + res = hrl->hr; + // The G1RSPopLimit may have increased, so recheck here... + if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) { + // Hah: don't need to schedule. + if (G1TracePopularity) { + gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT") " + "for pop-object evacuation (size %d < limit %d)", + res, res->bottom(), res->end(), + res->rem_set()->occupied(), G1RSPopLimit); + } + res->set_popular_pending(false); + res = NULL; + } + // We do not reset res->popular() here; if we did so, it would allow + // the region to be "rescheduled" for popularity evacuation. Instead, + // this is done in the collection pause, with the world stopped. + // So the invariant is that the regions in the list have the popularity + // boolean set, but having the boolean set does not imply membership + // on the list (though there can at most one such pop-pending region + // not on the list at any time). + delete hrl; + } + return res; +} + +void G1CollectedHeap::evac_popular_region(HeapRegion* hr) { + while (true) { + // Don't want to do a GC pause while cleanup is being completed! + wait_for_cleanup_complete(); + + // Read the GC count while holding the Heap_lock + int gc_count_before = SharedHeap::heap()->total_collections(); + g1_policy()->record_stop_world_start(); + + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1PopRegionCollectionPause op(gc_count_before, hr); + VMThread::execute(&op); + + // If the prolog succeeded, we didn't do a GC for this. + if (op.prologue_succeeded()) break; + } + // Otherwise we didn't. We should recheck the size, though, since + // the limit may have increased... + if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) { + hr->set_popular_pending(false); + break; + } + } +} + +void G1CollectedHeap::atomic_inc_obj_rc(oop obj) { + Atomic::inc(obj_rc_addr(obj)); +} + +class CountRCClosure: public OopsInHeapRegionClosure { + G1CollectedHeap* _g1h; + bool _parallel; +public: + CountRCClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _parallel(ParallelGCThreads > 0) + {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; + assert(obj != NULL, "Precondition."); + if (_parallel) { + // We go sticky at the limit to avoid excess contention. + // If we want to track the actual RC's further, we'll need to keep a + // per-thread hash table or something for the popular objects. + if (_g1h->obj_rc(obj) < G1ObjPopLimit) { + _g1h->atomic_inc_obj_rc(obj); + } + } else { + _g1h->inc_obj_rc(obj); + } + } +}; + +class EvacPopObjClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + size_t _pop_objs; + size_t _max_rc; +public: + EvacPopObjClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _pop_objs(0), _max_rc(0) {} + + void do_object(oop obj) { + size_t rc = _g1h->obj_rc(obj); + _max_rc = MAX2(rc, _max_rc); + if (rc >= (size_t) G1ObjPopLimit) { + _g1h->_pop_obj_rc_at_copy.add((double)rc); + size_t word_sz = obj->size(); + HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz); + oop new_pop_obj = (oop)new_pop_loc; + Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz); + obj->forward_to(new_pop_obj); + G1ScanAndBalanceClosure scan_and_balance(_g1h); + new_pop_obj->oop_iterate_backwards(&scan_and_balance); + // preserve "next" mark bit if marking is in progress. + if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) { + _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj); + } + + if (G1TracePopularity) { + gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT + " pop (%d), move to " PTR_FORMAT, + (void*) obj, word_sz, + _g1h->obj_rc(obj), (void*) new_pop_obj); + } + _pop_objs++; + } + } + size_t pop_objs() { return _pop_objs; } + size_t max_rc() { return _max_rc; } +}; + +class G1ParCountRCTask : public AbstractGangTask { + G1CollectedHeap* _g1h; + BitMap _bm; + + size_t getNCards() { + return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) + / G1BlockOffsetSharedArray::N_bytes; + } + CountRCClosure _count_rc_closure; +public: + G1ParCountRCTask(G1CollectedHeap* g1h) : + AbstractGangTask("G1 Par RC Count task"), + _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h) + {} + + void work(int i) { + ResourceMark rm; + HandleMark hm; + _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i); + } +}; + +void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) { + // We're evacuating a single region (for popularity). + if (G1TracePopularity) { + gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")", + popular_region->bottom(), popular_region->end()); + } + g1_policy()->set_single_region_collection_set(popular_region); + size_t max_rc; + if (!compute_reference_counts_and_evac_popular(popular_region, + &max_rc)) { + // We didn't evacuate any popular objects. + // We increase the RS popularity limit, to prevent this from + // happening in the future. + if (G1RSPopLimit < (1 << 30)) { + G1RSPopLimit *= 2; + } + // For now, interesting enough for a message: +#if 1 + gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), " + "failed to find a pop object (max = %d).", + popular_region->bottom(), popular_region->end(), + max_rc); + gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit); +#endif // 0 + // Also, we reset the collection set to NULL, to make the rest of + // the collection do nothing. + assert(popular_region->next_in_collection_set() == NULL, + "should be single-region."); + popular_region->set_in_collection_set(false); + popular_region->set_popular_pending(false); + g1_policy()->clear_collection_set(); + } +} + +bool G1CollectedHeap:: +compute_reference_counts_and_evac_popular(HeapRegion* popular_region, + size_t* max_rc) { + HeapWord* rc_region_bot; + HeapWord* rc_region_end; + + // Set up the reference count region. + HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords); + if (rc_region != NULL) { + rc_region_bot = rc_region->bottom(); + rc_region_end = rc_region->end(); + } else { + rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords); + if (rc_region_bot == NULL) { + vm_exit_out_of_memory(HeapRegion::GrainWords, + "No space for RC region."); + } + rc_region_end = rc_region_bot + HeapRegion::GrainWords; + } + + if (G1TracePopularity) + gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")", + rc_region_bot, rc_region_end); + if (rc_region_bot > popular_region->bottom()) { + _rc_region_above = true; + _rc_region_diff = + pointer_delta(rc_region_bot, popular_region->bottom(), 1); + } else { + assert(rc_region_bot < popular_region->bottom(), "Can't be equal."); + _rc_region_above = false; + _rc_region_diff = + pointer_delta(popular_region->bottom(), rc_region_bot, 1); + } + g1_policy()->record_pop_compute_rc_start(); + // Count external references. + g1_rem_set()->prepare_for_oops_into_collection_set_do(); + if (ParallelGCThreads > 0) { + + set_par_threads(workers()->total_workers()); + G1ParCountRCTask par_count_rc_task(this); + workers()->run_task(&par_count_rc_task); + set_par_threads(0); + + } else { + CountRCClosure count_rc_closure(this); + g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0); + } + g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + g1_policy()->record_pop_compute_rc_end(); + + // Now evacuate popular objects. + g1_policy()->record_pop_evac_start(); + EvacPopObjClosure evac_pop_obj_cl(this); + popular_region->object_iterate(&evac_pop_obj_cl); + *max_rc = evac_pop_obj_cl.max_rc(); + + // Make sure the last "top" value of the current popular region is copied + // as the "next_top_at_mark_start", so that objects made popular during + // markings aren't automatically considered live. + HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); + cur_pop_region->note_end_of_copying(); + + if (rc_region != NULL) { + free_region(rc_region); + } else { + FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot); + } + g1_policy()->record_pop_evac_end(); + + return evac_pop_obj_cl.pop_objs() > 0; +} + +class CountPopObjInfoClosure: public HeapRegionClosure { + size_t _objs; + size_t _bytes; + + class CountObjClosure: public ObjectClosure { + int _n; + public: + CountObjClosure() : _n(0) {} + void do_object(oop obj) { _n++; } + size_t n() { return _n; } + }; + +public: + CountPopObjInfoClosure() : _objs(0), _bytes(0) {} + bool doHeapRegion(HeapRegion* r) { + _bytes += r->used(); + CountObjClosure blk; + r->object_iterate(&blk); + _objs += blk.n(); + return false; + } + size_t objs() { return _objs; } + size_t bytes() { return _bytes; } +}; + + +void G1CollectedHeap::print_popularity_summary_info() const { + CountPopObjInfoClosure blk; + for (int i = 0; i <= _cur_pop_hr_index; i++) { + blk.doHeapRegion(_hrs->at(i)); + } + gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.", + blk.objs(), blk.bytes()); + gclog_or_tty->print_cr(" RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].", + _pop_obj_rc_at_copy.avg(), + _pop_obj_rc_at_copy.maximum(), + _pop_obj_rc_at_copy.sd()); +} + +void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) { + _refine_cte_cl->set_concurrent(concurrent); +} + +#ifndef PRODUCT + +class PrintHeapRegionClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion *r) { + gclog_or_tty->print("Region: "PTR_FORMAT":", r); + if (r != NULL) { + if (r->is_on_free_list()) + gclog_or_tty->print("Free "); + if (r->is_young()) + gclog_or_tty->print("Young "); + if (r->isHumongous()) + gclog_or_tty->print("Is Humongous "); + r->print(); + } + return false; + } +}; + +class SortHeapRegionClosure : public HeapRegionClosure { + size_t young_regions,free_regions, unclean_regions; + size_t hum_regions, count; + size_t unaccounted, cur_unclean, cur_alloc; + size_t total_free; + HeapRegion* cur; +public: + SortHeapRegionClosure(HeapRegion *_cur) : cur(_cur), young_regions(0), + free_regions(0), unclean_regions(0), + hum_regions(0), + count(0), unaccounted(0), + cur_alloc(0), total_free(0) + {} + bool doHeapRegion(HeapRegion *r) { + count++; + if (r->is_on_free_list()) free_regions++; + else if (r->is_on_unclean_list()) unclean_regions++; + else if (r->isHumongous()) hum_regions++; + else if (r->is_young()) young_regions++; + else if (r == cur) cur_alloc++; + else unaccounted++; + return false; + } + void print() { + total_free = free_regions + unclean_regions; + gclog_or_tty->print("%d regions\n", count); + gclog_or_tty->print("%d free: free_list = %d unclean = %d\n", + total_free, free_regions, unclean_regions); + gclog_or_tty->print("%d humongous %d young\n", + hum_regions, young_regions); + gclog_or_tty->print("%d cur_alloc\n", cur_alloc); + gclog_or_tty->print("UHOH unaccounted = %d\n", unaccounted); + } +}; + +void G1CollectedHeap::print_region_counts() { + SortHeapRegionClosure sc(_cur_alloc_region); + PrintHeapRegionClosure cl; + heap_region_iterate(&cl); + heap_region_iterate(&sc); + sc.print(); + print_region_accounting_info(); +}; + +bool G1CollectedHeap::regions_accounted_for() { + // TODO: regions accounting for young/survivor/tenured + return true; +} + +bool G1CollectedHeap::print_region_accounting_info() { + gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions); + gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).", + free_regions(), + count_free_regions(), count_free_regions_list(), + _free_region_list_size, _unclean_region_list.sz()); + gclog_or_tty->print_cr("cur_alloc: %d.", + (_cur_alloc_region == NULL ? 0 : 1)); + gclog_or_tty->print_cr("H regions: %d.", _num_humongous_regions); + + // TODO: check regions accounting for young/survivor/tenured + return true; +} + +bool G1CollectedHeap::is_in_closed_subset(const void* p) const { + HeapRegion* hr = heap_region_containing(p); + if (hr == NULL) { + return is_in_permanent(p); + } else { + return hr->is_in(p); + } +} +#endif // PRODUCT + +void G1CollectedHeap::g1_unimplemented() { + // Unimplemented(); +} + + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,1191 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A "G1CollectedHeap" is an implementation of a java heap for HotSpot. +// It uses the "Garbage First" heap organization and algorithm, which +// may combine concurrent marking with parallel, incremental compaction of +// heap subsets that will yield large amounts of garbage. + +class HeapRegion; +class HeapRegionSeq; +class HeapRegionList; +class PermanentGenerationSpec; +class GenerationSpec; +class OopsInHeapRegionClosure; +class G1ScanHeapEvacClosure; +class ObjectClosure; +class SpaceClosure; +class CompactibleSpaceClosure; +class Space; +class G1CollectorPolicy; +class GenRemSet; +class G1RemSet; +class HeapRegionRemSetIterator; +class ConcurrentMark; +class ConcurrentMarkThread; +class ConcurrentG1Refine; +class ConcurrentZFThread; + +// If want to accumulate detailed statistics on work queues +// turn this on. +#define G1_DETAILED_STATS 0 + +#if G1_DETAILED_STATS +# define IF_G1_DETAILED_STATS(code) code +#else +# define IF_G1_DETAILED_STATS(code) +#endif + +typedef GenericTaskQueue RefToScanQueue; +typedef GenericTaskQueueSet RefToScanQueueSet; + +enum G1GCThreadGroups { + G1CRGroup = 0, + G1ZFGroup = 1, + G1CMGroup = 2, + G1CLGroup = 3 +}; + +enum GCAllocPurpose { + GCAllocForTenured, + GCAllocForSurvived, + GCAllocPurposeCount +}; + +class YoungList : public CHeapObj { +private: + G1CollectedHeap* _g1h; + + HeapRegion* _head; + + HeapRegion* _scan_only_head; + HeapRegion* _scan_only_tail; + size_t _length; + size_t _scan_only_length; + + size_t _last_sampled_rs_lengths; + size_t _sampled_rs_lengths; + HeapRegion* _curr; + HeapRegion* _curr_scan_only; + + HeapRegion* _survivor_head; + HeapRegion* _survivors_tail; + size_t _survivor_length; + + void empty_list(HeapRegion* list); + +public: + YoungList(G1CollectedHeap* g1h); + + void push_region(HeapRegion* hr); + void add_survivor_region(HeapRegion* hr); + HeapRegion* pop_region(); + void empty_list(); + bool is_empty() { return _length == 0; } + size_t length() { return _length; } + size_t scan_only_length() { return _scan_only_length; } + + void rs_length_sampling_init(); + bool rs_length_sampling_more(); + void rs_length_sampling_next(); + + void reset_sampled_info() { + _last_sampled_rs_lengths = 0; + } + size_t sampled_rs_lengths() { return _last_sampled_rs_lengths; } + + // for development purposes + void reset_auxilary_lists(); + HeapRegion* first_region() { return _head; } + HeapRegion* first_scan_only_region() { return _scan_only_head; } + HeapRegion* first_survivor_region() { return _survivor_head; } + HeapRegion* par_get_next_scan_only_region() { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + HeapRegion* ret = _curr_scan_only; + if (ret != NULL) + _curr_scan_only = ret->get_next_young_region(); + return ret; + } + + // debugging + bool check_list_well_formed(); + bool check_list_empty(bool ignore_scan_only_list, + bool check_sample = true); + void print(); +}; + +class RefineCardTableEntryClosure; +class G1CollectedHeap : public SharedHeap { + friend class VM_G1CollectForAllocation; + friend class VM_GenCollectForPermanentAllocation; + friend class VM_G1CollectFull; + friend class VM_G1IncCollectionPause; + friend class VM_G1PopRegionCollectionPause; + friend class VMStructs; + + // Closures used in implementation. + friend class G1ParCopyHelper; + friend class G1IsAliveClosure; + friend class G1EvacuateFollowersClosure; + friend class G1ParScanThreadState; + friend class G1ParScanClosureSuper; + friend class G1ParEvacuateFollowersClosure; + friend class G1ParTask; + friend class G1FreeGarbageRegionClosure; + friend class RefineCardTableEntryClosure; + friend class G1PrepareCompactClosure; + friend class RegionSorter; + friend class CountRCClosure; + friend class EvacPopObjClosure; + + // Other related classes. + friend class G1MarkSweep; + +private: + enum SomePrivateConstants { + VeryLargeInBytes = HeapRegion::GrainBytes/2, + VeryLargeInWords = VeryLargeInBytes/HeapWordSize, + MinHeapDeltaBytes = 10 * HeapRegion::GrainBytes, // FIXME + NumAPIs = HeapRegion::MaxAge + }; + + + // The one and only G1CollectedHeap, so static functions can find it. + static G1CollectedHeap* _g1h; + + // Storage for the G1 heap (excludes the permanent generation). + VirtualSpace _g1_storage; + MemRegion _g1_reserved; + + // The part of _g1_storage that is currently committed. + MemRegion _g1_committed; + + // The maximum part of _g1_storage that has ever been committed. + MemRegion _g1_max_committed; + + // The number of regions that are completely free. + size_t _free_regions; + + // The number of regions we could create by expansion. + size_t _expansion_regions; + + // Return the number of free regions in the heap (by direct counting.) + size_t count_free_regions(); + // Return the number of free regions on the free and unclean lists. + size_t count_free_regions_list(); + + // The block offset table for the G1 heap. + G1BlockOffsetSharedArray* _bot_shared; + + // Move all of the regions off the free lists, then rebuild those free + // lists, before and after full GC. + void tear_down_region_lists(); + void rebuild_region_lists(); + // This sets all non-empty regions to need zero-fill (which they will if + // they are empty after full collection.) + void set_used_regions_to_need_zero_fill(); + + // The sequence of all heap regions in the heap. + HeapRegionSeq* _hrs; + + // The region from which normal-sized objects are currently being + // allocated. May be NULL. + HeapRegion* _cur_alloc_region; + + // Postcondition: cur_alloc_region == NULL. + void abandon_cur_alloc_region(); + + // The to-space memory regions into which objects are being copied during + // a GC. + HeapRegion* _gc_alloc_regions[GCAllocPurposeCount]; + uint _gc_alloc_region_counts[GCAllocPurposeCount]; + + // A list of the regions that have been set to be alloc regions in the + // current collection. + HeapRegion* _gc_alloc_region_list; + + // When called by par thread, require par_alloc_during_gc_lock() to be held. + void push_gc_alloc_region(HeapRegion* hr); + + // This should only be called single-threaded. Undeclares all GC alloc + // regions. + void forget_alloc_region_list(); + + // Should be used to set an alloc region, because there's other + // associated bookkeeping. + void set_gc_alloc_region(int purpose, HeapRegion* r); + + // Check well-formedness of alloc region list. + bool check_gc_alloc_regions(); + + // Outside of GC pauses, the number of bytes used in all regions other + // than the current allocation region. + size_t _summary_bytes_used; + + // Summary information about popular objects; method to print it. + NumberSeq _pop_obj_rc_at_copy; + void print_popularity_summary_info() const; + + unsigned _gc_time_stamp; + + size_t* _surviving_young_words; + + void setup_surviving_young_words(); + void update_surviving_young_words(size_t* surv_young_words); + void cleanup_surviving_young_words(); + +protected: + + // Returns "true" iff none of the gc alloc regions have any allocations + // since the last call to "save_marks". + bool all_alloc_regions_no_allocs_since_save_marks(); + // Calls "note_end_of_copying on all gc alloc_regions. + void all_alloc_regions_note_end_of_copying(); + + // The number of regions allocated to hold humongous objects. + int _num_humongous_regions; + YoungList* _young_list; + + // The current policy object for the collector. + G1CollectorPolicy* _g1_policy; + + // Parallel allocation lock to protect the current allocation region. + Mutex _par_alloc_during_gc_lock; + Mutex* par_alloc_during_gc_lock() { return &_par_alloc_during_gc_lock; } + + // If possible/desirable, allocate a new HeapRegion for normal object + // allocation sufficient for an allocation of the given "word_size". + // If "do_expand" is true, will attempt to expand the heap if necessary + // to to satisfy the request. If "zero_filled" is true, requires a + // zero-filled region. + // (Returning NULL will trigger a GC.) + virtual HeapRegion* newAllocRegion_work(size_t word_size, + bool do_expand, + bool zero_filled); + + virtual HeapRegion* newAllocRegion(size_t word_size, + bool zero_filled = true) { + return newAllocRegion_work(word_size, false, zero_filled); + } + virtual HeapRegion* newAllocRegionWithExpansion(int purpose, + size_t word_size, + bool zero_filled = true); + + // Attempt to allocate an object of the given (very large) "word_size". + // Returns "NULL" on failure. + virtual HeapWord* humongousObjAllocate(size_t word_size); + + // If possible, allocate a block of the given word_size, else return "NULL". + // Returning NULL will trigger GC or heap expansion. + // These two methods have rather awkward pre- and + // post-conditions. If they are called outside a safepoint, then + // they assume that the caller is holding the heap lock. Upon return + // they release the heap lock, if they are returning a non-NULL + // value. attempt_allocation_slow() also dirties the cards of a + // newly-allocated young region after it releases the heap + // lock. This change in interface was the neatest way to achieve + // this card dirtying without affecting mem_allocate(), which is a + // more frequently called method. We tried two or three different + // approaches, but they were even more hacky. + HeapWord* attempt_allocation(size_t word_size, + bool permit_collection_pause = true); + + HeapWord* attempt_allocation_slow(size_t word_size, + bool permit_collection_pause = true); + + // Allocate blocks during garbage collection. Will ensure an + // allocation region, either by picking one or expanding the + // heap, and then allocate a block of the given size. The block + // may not be a humongous - it must fit into a single heap region. + HeapWord* allocate_during_gc(GCAllocPurpose purpose, size_t word_size); + HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size); + + HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose, + HeapRegion* alloc_region, + bool par, + size_t word_size); + + // Ensure that no further allocations can happen in "r", bearing in mind + // that parallel threads might be attempting allocations. + void par_allocate_remaining_space(HeapRegion* r); + + // Helper function for two callbacks below. + // "full", if true, indicates that the GC is for a System.gc() request, + // and should collect the entire heap. If "clear_all_soft_refs" is true, + // all soft references are cleared during the GC. If "full" is false, + // "word_size" describes the allocation that the GC should + // attempt (at least) to satisfy. + void do_collection(bool full, bool clear_all_soft_refs, + size_t word_size); + + // Callback from VM_G1CollectFull operation. + // Perform a full collection. + void do_full_collection(bool clear_all_soft_refs); + + // Resize the heap if necessary after a full collection. If this is + // after a collect-for allocation, "word_size" is the allocation size, + // and will be considered part of the used portion of the heap. + void resize_if_necessary_after_full_collection(size_t word_size); + + // Callback from VM_G1CollectForAllocation operation. + // This function does everything necessary/possible to satisfy a + // failed allocation request (including collection, expansion, etc.) + HeapWord* satisfy_failed_allocation(size_t word_size); + + // Attempting to expand the heap sufficiently + // to support an allocation of the given "word_size". If + // successful, perform the allocation and return the address of the + // allocated block, or else "NULL". + virtual HeapWord* expand_and_allocate(size_t word_size); + +public: + // Expand the garbage-first heap by at least the given size (in bytes!). + // (Rounds up to a HeapRegion boundary.) + virtual void expand(size_t expand_bytes); + + // Do anything common to GC's. + virtual void gc_prologue(bool full); + virtual void gc_epilogue(bool full); + +protected: + + // Shrink the garbage-first heap by at most the given size (in bytes!). + // (Rounds down to a HeapRegion boundary.) + virtual void shrink(size_t expand_bytes); + void shrink_helper(size_t expand_bytes); + + // Do an incremental collection: identify a collection set, and evacuate + // its live objects elsewhere. + virtual void do_collection_pause(); + + // The guts of the incremental collection pause, executed by the vm + // thread. If "popular_region" is non-NULL, this pause should evacuate + // this single region whose remembered set has gotten large, moving + // any popular objects to one of the popular regions. + virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region); + + // Actually do the work of evacuating the collection set. + virtual void evacuate_collection_set(); + + // If this is an appropriate right time, do a collection pause. + // The "word_size" argument, if non-zero, indicates the size of an + // allocation request that is prompting this query. + void do_collection_pause_if_appropriate(size_t word_size); + + // The g1 remembered set of the heap. + G1RemSet* _g1_rem_set; + // And it's mod ref barrier set, used to track updates for the above. + ModRefBarrierSet* _mr_bs; + + // The Heap Region Rem Set Iterator. + HeapRegionRemSetIterator** _rem_set_iterator; + + // The closure used to refine a single card. + RefineCardTableEntryClosure* _refine_cte_cl; + + // A function to check the consistency of dirty card logs. + void check_ct_logs_at_safepoint(); + + // After a collection pause, make the regions in the CS into free + // regions. + void free_collection_set(HeapRegion* cs_head); + + // Applies "scan_non_heap_roots" to roots outside the heap, + // "scan_rs" to roots inside the heap (having done "set_region" to + // indicate the region in which the root resides), and does "scan_perm" + // (setting the generation to the perm generation.) If "scan_rs" is + // NULL, then this step is skipped. The "worker_i" + // param is for use with parallel roots processing, and should be + // the "i" of the calling parallel worker thread's work(i) function. + // In the sequential case this param will be ignored. + void g1_process_strong_roots(bool collecting_perm_gen, + SharedHeap::ScanningOption so, + OopClosure* scan_non_heap_roots, + OopsInHeapRegionClosure* scan_rs, + OopsInHeapRegionClosure* scan_so, + OopsInGenClosure* scan_perm, + int worker_i); + + void scan_scan_only_set(OopsInHeapRegionClosure* oc, + int worker_i); + void scan_scan_only_region(HeapRegion* hr, + OopsInHeapRegionClosure* oc, + int worker_i); + + // Apply "blk" to all the weak roots of the system. These include + // JNI weak roots, the code cache, system dictionary, symbol table, + // string table, and referents of reachable weak refs. + void g1_process_weak_roots(OopClosure* root_closure, + OopClosure* non_root_closure); + + // Invoke "save_marks" on all heap regions. + void save_marks(); + + // Free a heap region. + void free_region(HeapRegion* hr); + // A component of "free_region", exposed for 'batching'. + // All the params after "hr" are out params: the used bytes of the freed + // region(s), the number of H regions cleared, the number of regions + // freed, and pointers to the head and tail of a list of freed contig + // regions, linked throught the "next_on_unclean_list" field. + void free_region_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h, + size_t& freed_regions, + UncleanRegionList* list, + bool par = false); + + + // The concurrent marker (and the thread it runs in.) + ConcurrentMark* _cm; + ConcurrentMarkThread* _cmThread; + bool _mark_in_progress; + + // The concurrent refiner. + ConcurrentG1Refine* _cg1r; + + // The concurrent zero-fill thread. + ConcurrentZFThread* _czft; + + // The parallel task queues + RefToScanQueueSet *_task_queues; + + // True iff a evacuation has failed in the current collection. + bool _evacuation_failed; + + // Set the attribute indicating whether evacuation has failed in the + // current collection. + void set_evacuation_failed(bool b) { _evacuation_failed = b; } + + // Failed evacuations cause some logical from-space objects to have + // forwarding pointers to themselves. Reset them. + void remove_self_forwarding_pointers(); + + // When one is non-null, so is the other. Together, they each pair is + // an object with a preserved mark, and its mark value. + GrowableArray* _objs_with_preserved_marks; + GrowableArray* _preserved_marks_of_objs; + + // Preserve the mark of "obj", if necessary, in preparation for its mark + // word being overwritten with a self-forwarding-pointer. + void preserve_mark_if_necessary(oop obj, markOop m); + + // The stack of evac-failure objects left to be scanned. + GrowableArray* _evac_failure_scan_stack; + // The closure to apply to evac-failure objects. + + OopsInHeapRegionClosure* _evac_failure_closure; + // Set the field above. + void + set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_closure) { + _evac_failure_closure = evac_failure_closure; + } + + // Push "obj" on the scan stack. + void push_on_evac_failure_scan_stack(oop obj); + // Process scan stack entries until the stack is empty. + void drain_evac_failure_scan_stack(); + // True iff an invocation of "drain_scan_stack" is in progress; to + // prevent unnecessary recursion. + bool _drain_in_progress; + + // Do any necessary initialization for evacuation-failure handling. + // "cl" is the closure that will be used to process evac-failure + // objects. + void init_for_evac_failure(OopsInHeapRegionClosure* cl); + // Do any necessary cleanup for evacuation-failure handling data + // structures. + void finalize_for_evac_failure(); + + // An attempt to evacuate "obj" has failed; take necessary steps. + void handle_evacuation_failure(oop obj); + oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj); + void handle_evacuation_failure_common(oop obj, markOop m); + + + // Ensure that the relevant gc_alloc regions are set. + void get_gc_alloc_regions(); + // We're done with GC alloc regions; release them, as appropriate. + void release_gc_alloc_regions(); + + // ("Weak") Reference processing support + ReferenceProcessor* _ref_processor; + + enum G1H_process_strong_roots_tasks { + G1H_PS_mark_stack_oops_do, + G1H_PS_refProcessor_oops_do, + // Leave this one last. + G1H_PS_NumElements + }; + + SubTasksDone* _process_strong_tasks; + + // Allocate space to hold a popular object. Result is guaranteed below + // "popular_object_boundary()". Note: CURRENTLY halts the system if we + // run out of space to hold popular objects. + HeapWord* allocate_popular_object(size_t word_size); + + // The boundary between popular and non-popular objects. + HeapWord* _popular_object_boundary; + + HeapRegionList* _popular_regions_to_be_evacuated; + + // Compute which objects in "single_region" are popular. If any are, + // evacuate them to a popular region, leaving behind forwarding pointers, + // and select "popular_region" as the single collection set region. + // Otherwise, leave the collection set null. + void popularity_pause_preamble(HeapRegion* populer_region); + + // Compute which objects in "single_region" are popular, and evacuate + // them to a popular region, leaving behind forwarding pointers. + // Returns "true" if at least one popular object is discovered and + // evacuated. In any case, "*max_rc" is set to the maximum reference + // count of an object in the region. + bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region, + size_t* max_rc); + // Subroutines used in the above. + bool _rc_region_above; + size_t _rc_region_diff; + jint* obj_rc_addr(oop obj) { + uintptr_t obj_addr = (uintptr_t)obj; + if (_rc_region_above) { + jint* res = (jint*)(obj_addr + _rc_region_diff); + assert((uintptr_t)res > obj_addr, "RC region is above."); + return res; + } else { + jint* res = (jint*)(obj_addr - _rc_region_diff); + assert((uintptr_t)res < obj_addr, "RC region is below."); + return res; + } + } + jint obj_rc(oop obj) { + return *obj_rc_addr(obj); + } + void inc_obj_rc(oop obj) { + (*obj_rc_addr(obj))++; + } + void atomic_inc_obj_rc(oop obj); + + + // Number of popular objects and bytes (latter is cheaper!). + size_t pop_object_used_objs(); + size_t pop_object_used_bytes(); + + // Index of the popular region in which allocation is currently being + // done. + int _cur_pop_hr_index; + + // List of regions which require zero filling. + UncleanRegionList _unclean_region_list; + bool _unclean_regions_coming; + + bool check_age_cohort_well_formed_work(int a, HeapRegion* hr); + +public: + void set_refine_cte_cl_concurrency(bool concurrent); + + RefToScanQueue *task_queue(int i); + + // Create a G1CollectedHeap with the specified policy. + // Must call the initialize method afterwards. + // May not return if something goes wrong. + G1CollectedHeap(G1CollectorPolicy* policy); + + // Initialize the G1CollectedHeap to have the initial and + // maximum sizes, permanent generation, and remembered and barrier sets + // specified by the policy object. + jint initialize(); + + void ref_processing_init(); + + void set_par_threads(int t) { + SharedHeap::set_par_threads(t); + _process_strong_tasks->set_par_threads(t); + } + + virtual CollectedHeap::Name kind() const { + return CollectedHeap::G1CollectedHeap; + } + + // The current policy object for the collector. + G1CollectorPolicy* g1_policy() const { return _g1_policy; } + + // Adaptive size policy. No such thing for g1. + virtual AdaptiveSizePolicy* size_policy() { return NULL; } + + // The rem set and barrier set. + G1RemSet* g1_rem_set() const { return _g1_rem_set; } + ModRefBarrierSet* mr_bs() const { return _mr_bs; } + + // The rem set iterator. + HeapRegionRemSetIterator* rem_set_iterator(int i) { + return _rem_set_iterator[i]; + } + + HeapRegionRemSetIterator* rem_set_iterator() { + return _rem_set_iterator[0]; + } + + unsigned get_gc_time_stamp() { + return _gc_time_stamp; + } + + void reset_gc_time_stamp() { + _gc_time_stamp = 0; + } + + void iterate_dirty_card_closure(bool concurrent, int worker_i); + + // The shared block offset table array. + G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; } + + // Reference Processing accessor + ReferenceProcessor* ref_processor() { return _ref_processor; } + + // Reserved (g1 only; super method includes perm), capacity and the used + // portion in bytes. + size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); } + virtual size_t capacity() const; + virtual size_t used() const; + size_t recalculate_used() const; +#ifndef PRODUCT + size_t recalculate_used_regions() const; +#endif // PRODUCT + + // These virtual functions do the actual allocation. + virtual HeapWord* mem_allocate(size_t word_size, + bool is_noref, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded); + + // Some heaps may offer a contiguous region for shared non-blocking + // allocation, via inlined code (by exporting the address of the top and + // end fields defining the extent of the contiguous allocation region.) + // But G1CollectedHeap doesn't yet support this. + + // Return an estimate of the maximum allocation that could be performed + // without triggering any collection or expansion activity. In a + // generational collector, for example, this is probably the largest + // allocation that could be supported (without expansion) in the youngest + // generation. It is "unsafe" because no locks are taken; the result + // should be treated as an approximation, not a guarantee, for use in + // heuristic resizing decisions. + virtual size_t unsafe_max_alloc(); + + virtual bool is_maximal_no_gc() const { + return _g1_storage.uncommitted_size() == 0; + } + + // The total number of regions in the heap. + size_t n_regions(); + + // The number of regions that are completely free. + size_t max_regions(); + + // The number of regions that are completely free. + size_t free_regions(); + + // The number of regions that are not completely free. + size_t used_regions() { return n_regions() - free_regions(); } + + // True iff the ZF thread should run. + bool should_zf(); + + // The number of regions available for "regular" expansion. + size_t expansion_regions() { return _expansion_regions; } + +#ifndef PRODUCT + bool regions_accounted_for(); + bool print_region_accounting_info(); + void print_region_counts(); +#endif + + HeapRegion* alloc_region_from_unclean_list(bool zero_filled); + HeapRegion* alloc_region_from_unclean_list_locked(bool zero_filled); + + void put_region_on_unclean_list(HeapRegion* r); + void put_region_on_unclean_list_locked(HeapRegion* r); + + void prepend_region_list_on_unclean_list(UncleanRegionList* list); + void prepend_region_list_on_unclean_list_locked(UncleanRegionList* list); + + void set_unclean_regions_coming(bool b); + void set_unclean_regions_coming_locked(bool b); + // Wait for cleanup to be complete. + void wait_for_cleanup_complete(); + // Like above, but assumes that the calling thread owns the Heap_lock. + void wait_for_cleanup_complete_locked(); + + // Return the head of the unclean list. + HeapRegion* peek_unclean_region_list_locked(); + // Remove and return the head of the unclean list. + HeapRegion* pop_unclean_region_list_locked(); + + // List of regions which are zero filled and ready for allocation. + HeapRegion* _free_region_list; + // Number of elements on the free list. + size_t _free_region_list_size; + + // If the head of the unclean list is ZeroFilled, move it to the free + // list. + bool move_cleaned_region_to_free_list_locked(); + bool move_cleaned_region_to_free_list(); + + void put_free_region_on_list_locked(HeapRegion* r); + void put_free_region_on_list(HeapRegion* r); + + // Remove and return the head element of the free list. + HeapRegion* pop_free_region_list_locked(); + + // If "zero_filled" is true, we first try the free list, then we try the + // unclean list, zero-filling the result. If "zero_filled" is false, we + // first try the unclean list, then the zero-filled list. + HeapRegion* alloc_free_region_from_lists(bool zero_filled); + + // Verify the integrity of the region lists. + void remove_allocated_regions_from_lists(); + bool verify_region_lists(); + bool verify_region_lists_locked(); + size_t unclean_region_list_length(); + size_t free_region_list_length(); + + // Perform a collection of the heap; intended for use in implementing + // "System.gc". This probably implies as full a collection as the + // "CollectedHeap" supports. + virtual void collect(GCCause::Cause cause); + + // The same as above but assume that the caller holds the Heap_lock. + void collect_locked(GCCause::Cause cause); + + // This interface assumes that it's being called by the + // vm thread. It collects the heap assuming that the + // heap lock is already held and that we are executing in + // the context of the vm thread. + virtual void collect_as_vm_thread(GCCause::Cause cause); + + // True iff a evacuation has failed in the most-recent collection. + bool evacuation_failed() { return _evacuation_failed; } + + // Free a region if it is totally full of garbage. Returns the number of + // bytes freed (0 ==> didn't free it). + size_t free_region_if_totally_empty(HeapRegion *hr); + void free_region_if_totally_empty_work(HeapRegion *hr, + size_t& pre_used, + size_t& cleared_h_regions, + size_t& freed_regions, + UncleanRegionList* list, + bool par = false); + + // If we've done free region work that yields the given changes, update + // the relevant global variables. + void finish_free_region_work(size_t pre_used, + size_t cleared_h_regions, + size_t freed_regions, + UncleanRegionList* list); + + + // Returns "TRUE" iff "p" points into the allocated area of the heap. + virtual bool is_in(const void* p) const; + + // Return "TRUE" iff the given object address is within the collection + // set. + inline bool obj_in_cs(oop obj); + + // Return "TRUE" iff the given object address is in the reserved + // region of g1 (excluding the permanent generation). + bool is_in_g1_reserved(const void* p) const { + return _g1_reserved.contains(p); + } + + // Returns a MemRegion that corresponds to the space that has been + // committed in the heap + MemRegion g1_committed() { + return _g1_committed; + } + + NOT_PRODUCT( bool is_in_closed_subset(const void* p) const; ) + + // Dirty card table entries covering a list of young regions. + void dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list); + + // This resets the card table to all zeros. It is used after + // a collection pause which used the card table to claim cards. + void cleanUpCardTable(); + + // Iteration functions. + + // Iterate over all the ref-containing fields of all objects, calling + // "cl.do_oop" on each. + virtual void oop_iterate(OopClosure* cl); + + // Same as above, restricted to a memory region. + virtual void oop_iterate(MemRegion mr, OopClosure* cl); + + // Iterate over all objects, calling "cl.do_object" on each. + virtual void object_iterate(ObjectClosure* cl); + + // Iterate over all objects allocated since the last collection, calling + // "cl.do_object" on each. The heap must have been initialized properly + // to support this function, or else this call will fail. + virtual void object_iterate_since_last_GC(ObjectClosure* cl); + + // Iterate over all spaces in use in the heap, in ascending address order. + virtual void space_iterate(SpaceClosure* cl); + + // Iterate over heap regions, in address order, terminating the + // iteration early if the "doHeapRegion" method returns "true". + void heap_region_iterate(HeapRegionClosure* blk); + + // Iterate over heap regions starting with r (or the first region if "r" + // is NULL), in address order, terminating early if the "doHeapRegion" + // method returns "true". + void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk); + + // As above but starting from the region at index idx. + void heap_region_iterate_from(int idx, HeapRegionClosure* blk); + + HeapRegion* region_at(size_t idx); + + + // Divide the heap region sequence into "chunks" of some size (the number + // of regions divided by the number of parallel threads times some + // overpartition factor, currently 4). Assumes that this will be called + // in parallel by ParallelGCThreads worker threads with discinct worker + // ids in the range [0..max(ParallelGCThreads-1, 1)], that all parallel + // calls will use the same "claim_value", and that that claim value is + // different from the claim_value of any heap region before the start of + // the iteration. Applies "blk->doHeapRegion" to each of the regions, by + // attempting to claim the first region in each chunk, and, if + // successful, applying the closure to each region in the chunk (and + // setting the claim value of the second and subsequent regions of the + // chunk.) For now requires that "doHeapRegion" always returns "false", + // i.e., that a closure never attempt to abort a traversal. + void heap_region_par_iterate_chunked(HeapRegionClosure* blk, + int worker, + jint claim_value); + + // Iterate over the regions (if any) in the current collection set. + void collection_set_iterate(HeapRegionClosure* blk); + + // As above but starting from region r + void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk); + + // Returns the first (lowest address) compactible space in the heap. + virtual CompactibleSpace* first_compactible_space(); + + // A CollectedHeap will contain some number of spaces. This finds the + // space containing a given address, or else returns NULL. + virtual Space* space_containing(const void* addr) const; + + // A G1CollectedHeap will contain some number of heap regions. This + // finds the region containing a given address, or else returns NULL. + HeapRegion* heap_region_containing(const void* addr) const; + + // Like the above, but requires "addr" to be in the heap (to avoid a + // null-check), and unlike the above, may return an continuing humongous + // region. + HeapRegion* heap_region_containing_raw(const void* addr) const; + + // A CollectedHeap is divided into a dense sequence of "blocks"; that is, + // each address in the (reserved) heap is a member of exactly + // one block. The defining characteristic of a block is that it is + // possible to find its size, and thus to progress forward to the next + // block. (Blocks may be of different sizes.) Thus, blocks may + // represent Java objects, or they might be free blocks in a + // free-list-based heap (or subheap), as long as the two kinds are + // distinguishable and the size of each is determinable. + + // Returns the address of the start of the "block" that contains the + // address "addr". We say "blocks" instead of "object" since some heaps + // may not pack objects densely; a chunk may either be an object or a + // non-object. + virtual HeapWord* block_start(const void* addr) const; + + // Requires "addr" to be the start of a chunk, and returns its size. + // "addr + size" is required to be the start of a new chunk, or the end + // of the active area of the heap. + virtual size_t block_size(const HeapWord* addr) const; + + // Requires "addr" to be the start of a block, and returns "TRUE" iff + // the block is an object. + virtual bool block_is_obj(const HeapWord* addr) const; + + // Does this heap support heap inspection? (+PrintClassHistogram) + virtual bool supports_heap_inspection() const { return true; } + + // Section on thread-local allocation buffers (TLABs) + // See CollectedHeap for semantics. + + virtual bool supports_tlab_allocation() const; + virtual size_t tlab_capacity(Thread* thr) const; + virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; + virtual HeapWord* allocate_new_tlab(size_t size); + + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + // Since G1's TLAB's may, on occasion, come from non-young regions + // as well. (Is there a flag controlling that? XXX) + return false; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + // At least until perm gen collection is also G1-ified, at + // which point this should return false. + return true; + } + + virtual bool allocs_are_zero_filled(); + + // The boundary between a "large" and "small" array of primitives, in + // words. + virtual size_t large_typearray_limit(); + + // All popular objects are guaranteed to have addresses below this + // boundary. + HeapWord* popular_object_boundary() { + return _popular_object_boundary; + } + + // Declare the region as one that should be evacuated because its + // remembered set is too large. + void schedule_popular_region_evac(HeapRegion* r); + // If there is a popular region to evacuate it, remove it from the list + // and return it. + HeapRegion* popular_region_to_evac(); + // Evacuate the given popular region. + void evac_popular_region(HeapRegion* r); + + // Returns "true" iff the given word_size is "very large". + static bool isHumongous(size_t word_size) { + return word_size >= VeryLargeInWords; + } + + // Update mod union table with the set of dirty cards. + void updateModUnion(); + + // Set the mod union bits corresponding to the given memRegion. Note + // that this is always a safe operation, since it doesn't clear any + // bits. + void markModUnionRange(MemRegion mr); + + // Records the fact that a marking phase is no longer in progress. + void set_marking_complete() { + _mark_in_progress = false; + } + void set_marking_started() { + _mark_in_progress = true; + } + bool mark_in_progress() { + return _mark_in_progress; + } + + // Print the maximum heap capacity. + virtual size_t max_capacity() const; + + virtual jlong millis_since_last_gc(); + + // Perform any cleanup actions necessary before allowing a verification. + virtual void prepare_for_verify(); + + // Perform verification. + virtual void verify(bool allow_dirty, bool silent); + virtual void print() const; + virtual void print_on(outputStream* st) const; + + virtual void print_gc_threads_on(outputStream* st) const; + virtual void gc_threads_do(ThreadClosure* tc) const; + + // Override + void print_tracing_info() const; + + // If "addr" is a pointer into the (reserved?) heap, returns a positive + // number indicating the "arena" within the heap in which "addr" falls. + // Or else returns 0. + virtual int addr_to_arena_id(void* addr) const; + + // Convenience function to be used in situations where the heap type can be + // asserted to be this type. + static G1CollectedHeap* heap(); + + void empty_young_list(); + bool should_set_young_locked(); + + void set_region_short_lived_locked(HeapRegion* hr); + // add appropriate methods for any other surv rate groups + + void young_list_rs_length_sampling_init() { + _young_list->rs_length_sampling_init(); + } + bool young_list_rs_length_sampling_more() { + return _young_list->rs_length_sampling_more(); + } + void young_list_rs_length_sampling_next() { + _young_list->rs_length_sampling_next(); + } + size_t young_list_sampled_rs_lengths() { + return _young_list->sampled_rs_lengths(); + } + + size_t young_list_length() { return _young_list->length(); } + size_t young_list_scan_only_length() { + return _young_list->scan_only_length(); } + + HeapRegion* pop_region_from_young_list() { + return _young_list->pop_region(); + } + + HeapRegion* young_list_first_region() { + return _young_list->first_region(); + } + + // debugging + bool check_young_list_well_formed() { + return _young_list->check_list_well_formed(); + } + bool check_young_list_empty(bool ignore_scan_only_list, + bool check_sample = true); + + // *** Stuff related to concurrent marking. It's not clear to me that so + // many of these need to be public. + + // The functions below are helper functions that a subclass of + // "CollectedHeap" can use in the implementation of its virtual + // functions. + // This performs a concurrent marking of the live objects in a + // bitmap off to the side. + void doConcurrentMark(); + + // This is called from the marksweep collector which then does + // a concurrent mark and verifies that the results agree with + // the stop the world marking. + void checkConcurrentMark(); + void do_sync_mark(); + + bool isMarkedPrev(oop obj) const; + bool isMarkedNext(oop obj) const; + + // Determine if an object is dead, given the object and also + // the region to which the object belongs. An object is dead + // iff a) it was not allocated since the last mark and b) it + // is not marked. + + bool is_obj_dead(const oop obj, const HeapRegion* hr) const { + return + !hr->obj_allocated_since_prev_marking(obj) && + !isMarkedPrev(obj); + } + + // This is used when copying an object to survivor space. + // If the object is marked live, then we mark the copy live. + // If the object is allocated since the start of this mark + // cycle, then we mark the copy live. + // If the object has been around since the previous mark + // phase, and hasn't been marked yet during this phase, + // then we don't mark it, we just wait for the + // current marking cycle to get to it. + + // This function returns true when an object has been + // around since the previous marking and hasn't yet + // been marked during this marking. + + bool is_obj_ill(const oop obj, const HeapRegion* hr) const { + return + !hr->obj_allocated_since_next_marking(obj) && + !isMarkedNext(obj); + } + + // Determine if an object is dead, given only the object itself. + // This will find the region to which the object belongs and + // then call the region version of the same function. + + // Added if it is in permanent gen it isn't dead. + // Added if it is NULL it isn't dead. + + bool is_obj_dead(oop obj) { + HeapRegion* hr = heap_region_containing(obj); + if (hr == NULL) { + if (Universe::heap()->is_in_permanent(obj)) + return false; + else if (obj == NULL) return false; + else return true; + } + else return is_obj_dead(obj, hr); + } + + bool is_obj_ill(oop obj) { + HeapRegion* hr = heap_region_containing(obj); + if (hr == NULL) { + if (Universe::heap()->is_in_permanent(obj)) + return false; + else if (obj == NULL) return false; + else return true; + } + else return is_obj_ill(obj, hr); + } + + // The following is just to alert the verification code + // that a full collection has occurred and that the + // remembered sets are no longer up to date. + bool _full_collection; + void set_full_collection() { _full_collection = true;} + void clear_full_collection() {_full_collection = false;} + bool full_collection() {return _full_collection;} + + ConcurrentMark* concurrent_mark() const { return _cm; } + ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } + +public: + void stop_conc_gc_threads(); + + // + + double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); + void check_if_region_is_too_expensive(double predicted_time_ms); + size_t pending_card_num(); + size_t max_pending_card_num(); + size_t cards_scanned(); + + // + +protected: + size_t _max_heap_capacity; + +// debug_only(static void check_for_valid_allocation_state();) + +public: + // Temporary: call to mark things unimplemented for the G1 heap (e.g., + // MemoryService). In productization, we can make this assert false + // to catch such places (as well as searching for calls to this...) + static void g1_unimplemented(); + +}; + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,91 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Inline functions for G1CollectedHeap + +inline HeapRegion* +G1CollectedHeap::heap_region_containing(const void* addr) const { + HeapRegion* hr = _hrs->addr_to_region(addr); + // hr can be null if addr in perm_gen + if (hr != NULL && hr->continuesHumongous()) { + hr = hr->humongous_start_region(); + } + return hr; +} + +inline HeapRegion* +G1CollectedHeap::heap_region_containing_raw(const void* addr) const { + HeapRegion* res = _hrs->addr_to_region(addr); + assert(res != NULL, "addr outside of heap?"); + return res; +} + +inline bool G1CollectedHeap::obj_in_cs(oop obj) { + HeapRegion* r = _hrs->addr_to_region(obj); + return r != NULL && r->in_collection_set(); +} + +inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size, + bool permit_collection_pause) { + HeapWord* res = NULL; + + assert( SafepointSynchronize::is_at_safepoint() || + Heap_lock->owned_by_self(), "pre-condition of the call" ); + + if (_cur_alloc_region != NULL) { + + // If this allocation causes a region to become non empty, + // then we need to update our free_regions count. + + if (_cur_alloc_region->is_empty()) { + res = _cur_alloc_region->allocate(word_size); + if (res != NULL) + _free_regions--; + } else { + res = _cur_alloc_region->allocate(word_size); + } + } + if (res != NULL) { + if (!SafepointSynchronize::is_at_safepoint()) { + assert( Heap_lock->owned_by_self(), "invariant" ); + Heap_lock->unlock(); + } + return res; + } + // attempt_allocation_slow will also unlock the heap lock when appropriate. + return attempt_allocation_slow(word_size, permit_collection_pause); +} + +inline RefToScanQueue* G1CollectedHeap::task_queue(int i) { + return _task_queues->queue(i); +} + + +inline bool G1CollectedHeap::isMarkedPrev(oop obj) const { + return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj); +} + +inline bool G1CollectedHeap::isMarkedNext(oop obj) const { + return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,3159 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1CollectorPolicy.cpp.incl" + +#define PREDICTIONS_VERBOSE 0 + +// + +// Different defaults for different number of GC threads +// They were chosen by running GCOld and SPECjbb on debris with different +// numbers of GC threads and choosing them based on the results + +// all the same +static double rs_length_diff_defaults[] = { + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 +}; + +static double cost_per_card_ms_defaults[] = { + 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015 +}; + +static double cost_per_scan_only_region_ms_defaults[] = { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +}; + +// all the same +static double fully_young_cards_per_entry_ratio_defaults[] = { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +}; + +static double cost_per_entry_ms_defaults[] = { + 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 +}; + +static double cost_per_byte_ms_defaults[] = { + 0.00006, 0.00003, 0.00003, 0.000015, 0.000015, 0.00001, 0.00001, 0.000009 +}; + +// these should be pretty consistent +static double constant_other_time_ms_defaults[] = { + 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 +}; + + +static double young_other_cost_per_region_ms_defaults[] = { + 0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1 +}; + +static double non_young_other_cost_per_region_ms_defaults[] = { + 1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30 +}; + +// + +G1CollectorPolicy::G1CollectorPolicy() : + _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1), + _n_pauses(0), + _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _all_pause_times_ms(new NumberSeq()), + _stop_world_start(0.0), + _all_stop_world_times_ms(new NumberSeq()), + _all_yield_times_ms(new NumberSeq()), + + _all_mod_union_times_ms(new NumberSeq()), + + _non_pop_summary(new NonPopSummary()), + _pop_summary(new PopSummary()), + _non_pop_abandoned_summary(new NonPopAbandonedSummary()), + _pop_abandoned_summary(new PopAbandonedSummary()), + + _cur_clear_ct_time_ms(0.0), + + _region_num_young(0), + _region_num_tenured(0), + _prev_region_num_young(0), + _prev_region_num_tenured(0), + + _aux_num(10), + _all_aux_times_ms(new NumberSeq[_aux_num]), + _cur_aux_start_times_ms(new double[_aux_num]), + _cur_aux_times_ms(new double[_aux_num]), + _cur_aux_times_set(new bool[_aux_num]), + + _pop_compute_rc_start(0.0), + _pop_evac_start(0.0), + + _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + + // + + _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _prev_collection_pause_end_ms(0.0), + _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)), + _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_scan_only_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _partially_young_cards_per_entry_ratio_seq( + new TruncatedSeq(TruncatedSeqLength)), + _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_scan_only_region_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)), + _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _non_young_other_cost_per_region_ms_seq( + new TruncatedSeq(TruncatedSeqLength)), + + _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)), + _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)), + _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)), + + _pause_time_target_ms((double) G1MaxPauseTimeMS), + + // + + _in_young_gc_mode(false), + _full_young_gcs(true), + _full_young_pause_num(0), + _partial_young_pause_num(0), + + _during_marking(false), + _in_marking_window(false), + _in_marking_window_im(false), + + _known_garbage_ratio(0.0), + _known_garbage_bytes(0), + + _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)), + _target_pause_time_ms(-1.0), + + _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)), + + _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)), + + _recent_avg_pause_time_ratio(0.0), + _num_markings(0), + _n_marks(0), + _n_pauses_at_mark_end(0), + + _all_full_gc_times_ms(new NumberSeq()), + + _conc_refine_enabled(0), + _conc_refine_zero_traversals(0), + _conc_refine_max_traversals(0), + _conc_refine_current_delta(G1ConcRefineInitialDelta), + + // G1PausesBtwnConcMark defaults to -1 + // so the hack is to do the cast QQQ FIXME + _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), + _n_marks_since_last_pause(0), + _conc_mark_initiated(false), + _should_initiate_conc_mark(false), + _should_revert_to_full_young_gcs(false), + _last_full_young_gc(false), + + _prev_collection_pause_used_at_end_bytes(0), + + _collection_set(NULL), +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived", + G1YoungSurvRateNumRegionsSummary)), + _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor", + G1YoungSurvRateNumRegionsSummary)) + // add here any more surv rate groups +{ + _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime()); + _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0; + + _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads]; + _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_only_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_only_regions_scanned = new double[_parallel_gc_threads]; + + _par_last_update_rs_start_times_ms = new double[_parallel_gc_threads]; + _par_last_update_rs_times_ms = new double[_parallel_gc_threads]; + _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads]; + + _par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_rs_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads]; + + _par_last_obj_copy_times_ms = new double[_parallel_gc_threads]; + + _par_last_termination_times_ms = new double[_parallel_gc_threads]; + + // we store the data from the first pass during popularity pauses + _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads]; + + _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads]; + + _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads]; + + // start conservatively + _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS; + + // + + int index; + if (ParallelGCThreads == 0) + index = 0; + else if (ParallelGCThreads > 8) + index = 7; + else + index = ParallelGCThreads - 1; + + _pending_card_diff_seq->add(0.0); + _rs_length_diff_seq->add(rs_length_diff_defaults[index]); + _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_per_scan_only_region_ms_seq->add( + cost_per_scan_only_region_ms_defaults[index]); + _fully_young_cards_per_entry_ratio_seq->add( + fully_young_cards_per_entry_ratio_defaults[index]); + _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); + _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); + _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); + _young_other_cost_per_region_ms_seq->add( + young_other_cost_per_region_ms_defaults[index]); + _non_young_other_cost_per_region_ms_seq->add( + non_young_other_cost_per_region_ms_defaults[index]); + + // + + double time_slice = (double) G1TimeSliceMS / 1000.0; + double max_gc_time = (double) G1MaxPauseTimeMS / 1000.0; + guarantee(max_gc_time < time_slice, + "Max GC time should not be greater than the time slice"); + _mmu_tracker = new G1MMUTrackerQueue(time_slice, max_gc_time); + _sigma = (double) G1ConfidencePerc / 100.0; + + // start conservatively (around 50ms is about right) + _concurrent_mark_init_times_ms->add(0.05); + _concurrent_mark_remark_times_ms->add(0.05); + _concurrent_mark_cleanup_times_ms->add(0.20); + _tenuring_threshold = MaxTenuringThreshold; + + initialize_all(); +} + +// Increment "i", mod "len" +static void inc_mod(int& i, int len) { + i++; if (i == len) i = 0; +} + +void G1CollectorPolicy::initialize_flags() { + set_min_alignment(HeapRegion::GrainBytes); + set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name())); + CollectorPolicy::initialize_flags(); +} + +void G1CollectorPolicy::init() { + // Set aside an initial future to_space. + _g1 = G1CollectedHeap::heap(); + size_t regions = Universe::heap()->capacity() / HeapRegion::GrainBytes; + + assert(Heap_lock->owned_by_self(), "Locking discipline."); + + if (G1SteadyStateUsed < 50) { + vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%."); + } + if (UseConcMarkSweepGC) { + vm_exit_during_initialization("-XX:+UseG1GC is incompatible with " + "-XX:+UseConcMarkSweepGC."); + } + + if (G1Gen) { + _in_young_gc_mode = true; + + if (G1YoungGenSize == 0) { + set_adaptive_young_list_length(true); + _young_list_fixed_length = 0; + } else { + set_adaptive_young_list_length(false); + _young_list_fixed_length = (G1YoungGenSize / HeapRegion::GrainBytes); + } + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = 0; + calculate_young_list_min_length(); + guarantee( _young_list_min_length == 0, "invariant, not enough info" ); + calculate_young_list_target_config(); + } else { + _young_list_fixed_length = 0; + _in_young_gc_mode = false; + } +} + +void G1CollectorPolicy::calculate_young_list_min_length() { + _young_list_min_length = 0; + + if (!adaptive_young_list_length()) + return; + + if (_alloc_rate_ms_seq->num() > 3) { + double now_sec = os::elapsedTime(); + double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0; + double alloc_rate_ms = predict_alloc_rate_ms(); + int min_regions = (int) ceil(alloc_rate_ms * when_ms); + int current_region_num = (int) _g1->young_list_length(); + _young_list_min_length = min_regions + current_region_num; + } +} + +void G1CollectorPolicy::calculate_young_list_target_config() { + if (adaptive_young_list_length()) { + size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); + calculate_young_list_target_config(rs_lengths); + } else { + if (full_young_gcs()) + _young_list_target_length = _young_list_fixed_length; + else + _young_list_target_length = _young_list_fixed_length / 2; + _young_list_target_length = MAX2(_young_list_target_length, (size_t)1); + size_t so_length = calculate_optimal_so_length(_young_list_target_length); + guarantee( so_length < _young_list_target_length, "invariant" ); + _young_list_so_prefix_length = so_length; + } +} + +// This method calculate the optimal scan-only set for a fixed young +// gen size. I couldn't work out how to reuse the more elaborate one, +// i.e. calculate_young_list_target_config(rs_length), as the loops are +// fundamentally different (the other one finds a config for different +// S-O lengths, whereas here we need to do the opposite). +size_t G1CollectorPolicy::calculate_optimal_so_length( + size_t young_list_length) { + if (!G1UseScanOnlyPrefix) + return 0; + + if (_all_pause_times_ms->num() < 3) { + // we won't use a scan-only set at the beginning to allow the rest + // of the predictors to warm up + return 0; + } + + if (_cost_per_scan_only_region_ms_seq->num() < 3) { + // then, we'll only set the S-O set to 1 for a little bit of time, + // to get enough information on the scanning cost + return 1; + } + + size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); + size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); + size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); + size_t scanned_cards; + if (full_young_gcs()) + scanned_cards = predict_young_card_num(adj_rs_lengths); + else + scanned_cards = predict_non_young_card_num(adj_rs_lengths); + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, + scanned_cards); + + size_t so_length = 0; + double max_gc_eff = 0.0; + for (size_t i = 0; i < young_list_length; ++i) { + double gc_eff = 0.0; + double pause_time_ms = 0.0; + predict_gc_eff(young_list_length, i, base_time_ms, + &gc_eff, &pause_time_ms); + if (gc_eff > max_gc_eff) { + max_gc_eff = gc_eff; + so_length = i; + } + } + + // set it to 95% of the optimal to make sure we sample the "area" + // around the optimal length to get up-to-date survival rate data + return so_length * 950 / 1000; +} + +// This is a really cool piece of code! It finds the best +// target configuration (young length / scan-only prefix length) so +// that GC efficiency is maximized and that we also meet a pause +// time. It's a triple nested loop. These loops are explained below +// from the inside-out :-) +// +// (a) The innermost loop will try to find the optimal young length +// for a fixed S-O length. It uses a binary search to speed up the +// process. We assume that, for a fixed S-O length, as we add more +// young regions to the CSet, the GC efficiency will only go up (I'll +// skip the proof). So, using a binary search to optimize this process +// makes perfect sense. +// +// (b) The middle loop will fix the S-O length before calling the +// innermost one. It will vary it between two parameters, increasing +// it by a given increment. +// +// (c) The outermost loop will call the middle loop three times. +// (1) The first time it will explore all possible S-O length values +// from 0 to as large as it can get, using a coarse increment (to +// quickly "home in" to where the optimal seems to be). +// (2) The second time it will explore the values around the optimal +// that was found by the first iteration using a fine increment. +// (3) Once the optimal config has been determined by the second +// iteration, we'll redo the calculation, but setting the S-O length +// to 95% of the optimal to make sure we sample the "area" +// around the optimal length to get up-to-date survival rate data +// +// Termination conditions for the iterations are several: the pause +// time is over the limit, we do not have enough to-space, etc. + +void G1CollectorPolicy::calculate_young_list_target_config(size_t rs_lengths) { + guarantee( adaptive_young_list_length(), "pre-condition" ); + + double start_time_sec = os::elapsedTime(); + size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1MinReservePerc); + min_reserve_perc = MIN2((size_t) 50, min_reserve_perc); + size_t reserve_regions = + (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0); + + if (full_young_gcs() && _free_regions_at_end_of_collection > 0) { + // we are in fully-young mode and there are free regions in the heap + + size_t min_so_length = 0; + size_t max_so_length = 0; + + if (G1UseScanOnlyPrefix) { + if (_all_pause_times_ms->num() < 3) { + // we won't use a scan-only set at the beginning to allow the rest + // of the predictors to warm up + min_so_length = 0; + max_so_length = 0; + } else if (_cost_per_scan_only_region_ms_seq->num() < 3) { + // then, we'll only set the S-O set to 1 for a little bit of time, + // to get enough information on the scanning cost + min_so_length = 1; + max_so_length = 1; + } else if (_in_marking_window || _last_full_young_gc) { + // no S-O prefix during a marking phase either, as at the end + // of the marking phase we'll have to use a very small young + // length target to fill up the rest of the CSet with + // non-young regions and, if we have lots of scan-only regions + // left-over, we will not be able to add any more non-young + // regions. + min_so_length = 0; + max_so_length = 0; + } else { + // this is the common case; we'll never reach the maximum, we + // one of the end conditions will fire well before that + // (hopefully!) + min_so_length = 0; + max_so_length = _free_regions_at_end_of_collection - 1; + } + } else { + // no S-O prefix, as the switch is not set, but we still need to + // do one iteration to calculate the best young target that + // meets the pause time; this way we reuse the same code instead + // of replicating it + min_so_length = 0; + max_so_length = 0; + } + + double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); + size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); + size_t scanned_cards; + if (full_young_gcs()) + scanned_cards = predict_young_card_num(adj_rs_lengths); + else + scanned_cards = predict_non_young_card_num(adj_rs_lengths); + // calculate this once, so that we don't have to recalculate it in + // the innermost loop + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, + scanned_cards); + + // the result + size_t final_young_length = 0; + size_t final_so_length = 0; + double final_gc_eff = 0.0; + // we'll also keep track of how many times we go into the inner loop + // this is for profiling reasons + size_t calculations = 0; + + // this determines which of the three iterations the outer loop is in + typedef enum { + pass_type_coarse, + pass_type_fine, + pass_type_final + } pass_type_t; + + // range of the outer loop's iteration + size_t from_so_length = min_so_length; + size_t to_so_length = max_so_length; + guarantee( from_so_length <= to_so_length, "invariant" ); + + // this will keep the S-O length that's found by the second + // iteration of the outer loop; we'll keep it just in case the third + // iteration fails to find something + size_t fine_so_length = 0; + + // the increment step for the coarse (first) iteration + size_t so_coarse_increments = 5; + + // the common case, we'll start with the coarse iteration + pass_type_t pass = pass_type_coarse; + size_t so_length_incr = so_coarse_increments; + + if (from_so_length == to_so_length) { + // not point in doing the coarse iteration, we'll go directly into + // the fine one (we essentially trying to find the optimal young + // length for a fixed S-O length). + so_length_incr = 1; + pass = pass_type_final; + } else if (to_so_length - from_so_length < 3 * so_coarse_increments) { + // again, the range is too short so no point in foind the coarse + // iteration either + so_length_incr = 1; + pass = pass_type_fine; + } + + bool done = false; + // this is the outermost loop + while (!done) { +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT + ", incr " SIZE_FORMAT ", pass %s", + from_so_length, to_so_length, so_length_incr, + (pass == pass_type_coarse) ? "coarse" : + (pass == pass_type_fine) ? "fine" : "final"); +#endif // 0 + + size_t so_length = from_so_length; + size_t init_free_regions = + MAX2((size_t)0, + _free_regions_at_end_of_collection + + _scan_only_regions_at_end_of_collection - reserve_regions); + + // this determines whether a configuration was found + bool gc_eff_set = false; + // this is the middle loop + while (so_length <= to_so_length) { + // base time, which excludes region-related time; again we + // calculate it once to avoid recalculating it in the + // innermost loop + double base_time_with_so_ms = + base_time_ms + predict_scan_only_time_ms(so_length); + // it's already over the pause target, go around + if (base_time_with_so_ms > target_pause_time_ms) + break; + + size_t starting_young_length = so_length+1; + + // we make sure that the short young length that makes sense + // (one more than the S-O length) is feasible + size_t min_young_length = starting_young_length; + double min_gc_eff; + bool min_ok; + ++calculations; + min_ok = predict_gc_eff(min_young_length, so_length, + base_time_with_so_ms, + init_free_regions, target_pause_time_ms, + &min_gc_eff); + + if (min_ok) { + // the shortest young length is indeed feasible; we'll know + // set up the max young length and we'll do a binary search + // between min_young_length and max_young_length + size_t max_young_length = _free_regions_at_end_of_collection - 1; + double max_gc_eff = 0.0; + bool max_ok = false; + + // the innermost loop! (finally!) + while (max_young_length > min_young_length) { + // we'll make sure that min_young_length is always at a + // feasible config + guarantee( min_ok, "invariant" ); + + ++calculations; + max_ok = predict_gc_eff(max_young_length, so_length, + base_time_with_so_ms, + init_free_regions, target_pause_time_ms, + &max_gc_eff); + + size_t diff = (max_young_length - min_young_length) / 2; + if (max_ok) { + min_young_length = max_young_length; + min_gc_eff = max_gc_eff; + min_ok = true; + } + max_young_length = min_young_length + diff; + } + + // the innermost loop found a config + guarantee( min_ok, "invariant" ); + if (min_gc_eff > final_gc_eff) { + // it's the best config so far, so we'll keep it + final_gc_eff = min_gc_eff; + final_young_length = min_young_length; + final_so_length = so_length; + gc_eff_set = true; + } + } + + // incremental the fixed S-O length and go around + so_length += so_length_incr; + } + + // this is the end of the outermost loop and we need to decide + // what to do during the next iteration + if (pass == pass_type_coarse) { + // we just did the coarse pass (first iteration) + + if (!gc_eff_set) + // we didn't find a feasible config so we'll just bail out; of + // course, it might be the case that we missed it; but I'd say + // it's a bit unlikely + done = true; + else { + // We did find a feasible config with optimal GC eff during + // the first pass. So the second pass we'll only consider the + // S-O lengths around that config with a fine increment. + + guarantee( so_length_incr == so_coarse_increments, "invariant" ); + guarantee( final_so_length >= min_so_length, "invariant" ); + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr(" coarse pass: SO length " SIZE_FORMAT, + final_so_length); +#endif // 0 + + from_so_length = + (final_so_length - min_so_length > so_coarse_increments) ? + final_so_length - so_coarse_increments + 1 : min_so_length; + to_so_length = + (max_so_length - final_so_length > so_coarse_increments) ? + final_so_length + so_coarse_increments - 1 : max_so_length; + + pass = pass_type_fine; + so_length_incr = 1; + } + } else if (pass == pass_type_fine) { + // we just finished the second pass + + if (!gc_eff_set) { + // we didn't find a feasible config (yes, it's possible; + // notice that, sometimes, we go directly into the fine + // iteration and skip the coarse one) so we bail out + done = true; + } else { + // We did find a feasible config with optimal GC eff + guarantee( so_length_incr == 1, "invariant" ); + + if (final_so_length == 0) { + // The config is of an empty S-O set, so we'll just bail out + done = true; + } else { + // we'll go around once more, setting the S-O length to 95% + // of the optimal + size_t new_so_length = 950 * final_so_length / 1000; + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr(" fine pass: SO length " SIZE_FORMAT + ", setting it to " SIZE_FORMAT, + final_so_length, new_so_length); +#endif // 0 + + from_so_length = new_so_length; + to_so_length = new_so_length; + fine_so_length = final_so_length; + + pass = pass_type_final; + } + } + } else if (pass == pass_type_final) { + // we just finished the final (third) pass + + if (!gc_eff_set) + // we didn't find a feasible config, so we'll just use the one + // we found during the second pass, which we saved + final_so_length = fine_so_length; + + // and we're done! + done = true; + } else { + guarantee( false, "should never reach here" ); + } + + // we now go around the outermost loop + } + + // we should have at least one region in the target young length + _young_list_target_length = MAX2((size_t) 1, final_young_length); + if (final_so_length >= final_young_length) + // and we need to ensure that the S-O length is not greater than + // the target young length (this is being a bit careful) + final_so_length = 0; + _young_list_so_prefix_length = final_so_length; + guarantee( !_in_marking_window || !_last_full_young_gc || + _young_list_so_prefix_length == 0, "invariant" ); + + // let's keep an eye of how long we spend on this calculation + // right now, I assume that we'll print it when we need it; we + // should really adde it to the breakdown of a pause + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0; + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT + ", SO = " SIZE_FORMAT ", " + "elapsed %1.2lf ms, calcs: " SIZE_FORMAT " (%s%s) " + SIZE_FORMAT SIZE_FORMAT, + target_pause_time_ms, + _young_list_target_length - _young_list_so_prefix_length, + _young_list_so_prefix_length, + elapsed_time_ms, + calculations, + full_young_gcs() ? "full" : "partial", + should_initiate_conc_mark() ? " i-m" : "", + in_marking_window(), + in_marking_window_im()); +#endif // 0 + + if (_young_list_target_length < _young_list_min_length) { + // bummer; this means that, if we do a pause when the optimal + // config dictates, we'll violate the pause spacing target (the + // min length was calculate based on the application's current + // alloc rate); + + // so, we have to bite the bullet, and allocate the minimum + // number. We'll violate our target, but we just can't meet it. + + size_t so_length = 0; + // a note further up explains why we do not want an S-O length + // during marking + if (!_in_marking_window && !_last_full_young_gc) + // but we can still try to see whether we can find an optimal + // S-O length + so_length = calculate_optimal_so_length(_young_list_min_length); + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("adjusted target length from " + SIZE_FORMAT " to " SIZE_FORMAT + ", SO " SIZE_FORMAT, + _young_list_target_length, _young_list_min_length, + so_length); +#endif // 0 + + _young_list_target_length = + MAX2(_young_list_min_length, (size_t)1); + _young_list_so_prefix_length = so_length; + } + } else { + // we are in a partially-young mode or we've run out of regions (due + // to evacuation failure) + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT + ", SO " SIZE_FORMAT, + _young_list_min_length, 0); +#endif // 0 + + // we'll do the pause as soon as possible and with no S-O prefix + // (see above for the reasons behind the latter) + _young_list_target_length = + MAX2(_young_list_min_length, (size_t) 1); + _young_list_so_prefix_length = 0; + } + + _rs_lengths_prediction = rs_lengths; +} + +// This is used by: calculate_optimal_so_length(length). It returns +// the GC eff and predicted pause time for a particular config +void +G1CollectorPolicy::predict_gc_eff(size_t young_length, + size_t so_length, + double base_time_ms, + double* ret_gc_eff, + double* ret_pause_time_ms) { + double so_time_ms = predict_scan_only_time_ms(so_length); + double accum_surv_rate_adj = 0.0; + if (so_length > 0) + accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1)); + double accum_surv_rate = + accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj; + size_t bytes_to_copy = + (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); + double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy); + double young_other_time_ms = + predict_young_other_time_ms(young_length - so_length); + double pause_time_ms = + base_time_ms + so_time_ms + copy_time_ms + young_other_time_ms; + size_t reclaimed_bytes = + (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy; + double gc_eff = (double) reclaimed_bytes / pause_time_ms; + + *ret_gc_eff = gc_eff; + *ret_pause_time_ms = pause_time_ms; +} + +// This is used by: calculate_young_list_target_config(rs_length). It +// returns the GC eff of a particular config. It returns false if that +// config violates any of the end conditions of the search in the +// calling method, or true upon success. The end conditions were put +// here since it's called twice and it was best not to replicate them +// in the caller. Also, passing the parameteres avoids having to +// recalculate them in the innermost loop. +bool +G1CollectorPolicy::predict_gc_eff(size_t young_length, + size_t so_length, + double base_time_with_so_ms, + size_t init_free_regions, + double target_pause_time_ms, + double* ret_gc_eff) { + *ret_gc_eff = 0.0; + + if (young_length >= init_free_regions) + // end condition 1: not enough space for the young regions + return false; + + double accum_surv_rate_adj = 0.0; + if (so_length > 0) + accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1)); + double accum_surv_rate = + accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj; + size_t bytes_to_copy = + (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); + double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy); + double young_other_time_ms = + predict_young_other_time_ms(young_length - so_length); + double pause_time_ms = + base_time_with_so_ms + copy_time_ms + young_other_time_ms; + + if (pause_time_ms > target_pause_time_ms) + // end condition 2: over the target pause time + return false; + + size_t reclaimed_bytes = + (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy; + size_t free_bytes = + (init_free_regions - young_length) * HeapRegion::GrainBytes; + + if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes) + // end condition 3: out of to-space (conservatively) + return false; + + // success! + double gc_eff = (double) reclaimed_bytes / pause_time_ms; + *ret_gc_eff = gc_eff; + + return true; +} + +void G1CollectorPolicy::check_prediction_validity() { + guarantee( adaptive_young_list_length(), "should not call this otherwise" ); + + size_t rs_lengths = _g1->young_list_sampled_rs_lengths(); + if (rs_lengths > _rs_lengths_prediction) { + // add 10% to avoid having to recalculate often + size_t rs_lengths_prediction = rs_lengths * 1100 / 1000; + calculate_young_list_target_config(rs_lengths_prediction); + } +} + +HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded) { + guarantee(false, "Not using this policy feature yet."); + return NULL; +} + +// This method controls how a collector handles one or more +// of its generations being fully allocated. +HeapWord* G1CollectorPolicy::satisfy_failed_allocation(size_t size, + bool is_tlab) { + guarantee(false, "Not using this policy feature yet."); + return NULL; +} + + +#ifndef PRODUCT +bool G1CollectorPolicy::verify_young_ages() { + HeapRegion* head = _g1->young_list_first_region(); + return + verify_young_ages(head, _short_lived_surv_rate_group); + // also call verify_young_ages on any additional surv rate groups +} + +bool +G1CollectorPolicy::verify_young_ages(HeapRegion* head, + SurvRateGroup *surv_rate_group) { + guarantee( surv_rate_group != NULL, "pre-condition" ); + + const char* name = surv_rate_group->name(); + bool ret = true; + int prev_age = -1; + + for (HeapRegion* curr = head; + curr != NULL; + curr = curr->get_next_young_region()) { + SurvRateGroup* group = curr->surv_rate_group(); + if (group == NULL && !curr->is_survivor()) { + gclog_or_tty->print_cr("## %s: encountered NULL surv_rate_group", name); + ret = false; + } + + if (surv_rate_group == group) { + int age = curr->age_in_surv_rate_group(); + + if (age < 0) { + gclog_or_tty->print_cr("## %s: encountered negative age", name); + ret = false; + } + + if (age <= prev_age) { + gclog_or_tty->print_cr("## %s: region ages are not strictly increasing " + "(%d, %d)", name, age, prev_age); + ret = false; + } + prev_age = age; + } + } + + return ret; +} +#endif // PRODUCT + +void G1CollectorPolicy::record_full_collection_start() { + _cur_collection_start_sec = os::elapsedTime(); + // Release the future to-space so that it is available for compaction into. + _g1->set_full_collection(); +} + +void G1CollectorPolicy::record_full_collection_end() { + // Consider this like a collection pause for the purposes of allocation + // since last pause. + double end_sec = os::elapsedTime(); + double full_gc_time_sec = end_sec - _cur_collection_start_sec; + double full_gc_time_ms = full_gc_time_sec * 1000.0; + + checkpoint_conc_overhead(); + + _all_full_gc_times_ms->add(full_gc_time_ms); + + update_recent_gc_times(end_sec, full_gc_time_sec); + + _g1->clear_full_collection(); + + // "Nuke" the heuristics that control the fully/partially young GC + // transitions and make sure we start with fully young GCs after the + // Full GC. + set_full_young_gcs(true); + _last_full_young_gc = false; + _should_revert_to_full_young_gcs = false; + _should_initiate_conc_mark = false; + _known_garbage_bytes = 0; + _known_garbage_ratio = 0.0; + _in_marking_window = false; + _in_marking_window_im = false; + + _short_lived_surv_rate_group->record_scan_only_prefix(0); + _short_lived_surv_rate_group->start_adding_regions(); + // also call this on any additional surv rate groups + + _prev_region_num_young = _region_num_young; + _prev_region_num_tenured = _region_num_tenured; + + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = 0; + calculate_young_list_min_length(); + calculate_young_list_target_config(); + } + +void G1CollectorPolicy::record_pop_compute_rc_start() { + _pop_compute_rc_start = os::elapsedTime(); +} +void G1CollectorPolicy::record_pop_compute_rc_end() { + double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0; + _cur_popular_compute_rc_time_ms = ms; + _pop_compute_rc_start = 0.0; +} +void G1CollectorPolicy::record_pop_evac_start() { + _pop_evac_start = os::elapsedTime(); +} +void G1CollectorPolicy::record_pop_evac_end() { + double ms = (os::elapsedTime() - _pop_evac_start)*1000.0; + _cur_popular_evac_time_ms = ms; + _pop_evac_start = 0.0; +} + +void G1CollectorPolicy::record_before_bytes(size_t bytes) { + _bytes_in_to_space_before_gc += bytes; +} + +void G1CollectorPolicy::record_after_bytes(size_t bytes) { + _bytes_in_to_space_after_gc += bytes; +} + +void G1CollectorPolicy::record_stop_world_start() { + _stop_world_start = os::elapsedTime(); +} + +void G1CollectorPolicy::record_collection_pause_start(double start_time_sec, + size_t start_used) { + if (PrintGCDetails) { + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print("[GC pause"); + if (in_young_gc_mode()) + gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial"); + } + + assert(_g1->used_regions() == _g1->recalculate_used_regions(), + "sanity"); + + double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0; + _all_stop_world_times_ms->add(s_w_t_ms); + _stop_world_start = 0.0; + + _cur_collection_start_sec = start_time_sec; + _cur_collection_pause_used_at_start_bytes = start_used; + _cur_collection_pause_used_regions_at_start = _g1->used_regions(); + _pending_cards = _g1->pending_card_num(); + _max_pending_cards = _g1->max_pending_card_num(); + + _bytes_in_to_space_before_gc = 0; + _bytes_in_to_space_after_gc = 0; + _bytes_in_collection_set_before_gc = 0; + +#ifdef DEBUG + // initialise these to something well known so that we can spot + // if they are not set properly + + for (int i = 0; i < _parallel_gc_threads; ++i) { + _par_last_ext_root_scan_times_ms[i] = -666.0; + _par_last_mark_stack_scan_times_ms[i] = -666.0; + _par_last_scan_only_times_ms[i] = -666.0; + _par_last_scan_only_regions_scanned[i] = -666.0; + _par_last_update_rs_start_times_ms[i] = -666.0; + _par_last_update_rs_times_ms[i] = -666.0; + _par_last_update_rs_processed_buffers[i] = -666.0; + _par_last_scan_rs_start_times_ms[i] = -666.0; + _par_last_scan_rs_times_ms[i] = -666.0; + _par_last_scan_new_refs_times_ms[i] = -666.0; + _par_last_obj_copy_times_ms[i] = -666.0; + _par_last_termination_times_ms[i] = -666.0; + + _pop_par_last_update_rs_start_times_ms[i] = -666.0; + _pop_par_last_update_rs_times_ms[i] = -666.0; + _pop_par_last_update_rs_processed_buffers[i] = -666.0; + _pop_par_last_scan_rs_start_times_ms[i] = -666.0; + _pop_par_last_scan_rs_times_ms[i] = -666.0; + _pop_par_last_closure_app_times_ms[i] = -666.0; + } +#endif + + for (int i = 0; i < _aux_num; ++i) { + _cur_aux_times_ms[i] = 0.0; + _cur_aux_times_set[i] = false; + } + + _satb_drain_time_set = false; + _last_satb_drain_processed_buffers = -1; + + if (in_young_gc_mode()) + _last_young_gc_full = false; + + + // do that for any other surv rate groups + _short_lived_surv_rate_group->stop_adding_regions(); + size_t short_lived_so_length = _young_list_so_prefix_length; + _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length); + tag_scan_only(short_lived_so_length); + + assert( verify_young_ages(), "region age verification" ); +} + +void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) { + // done in a way that it can be extended for other surv rate groups too... + + HeapRegion* head = _g1->young_list_first_region(); + bool finished_short_lived = (short_lived_scan_only_length == 0); + + if (finished_short_lived) + return; + + for (HeapRegion* curr = head; + curr != NULL; + curr = curr->get_next_young_region()) { + SurvRateGroup* surv_rate_group = curr->surv_rate_group(); + int age = curr->age_in_surv_rate_group(); + + if (surv_rate_group == _short_lived_surv_rate_group) { + if ((size_t)age < short_lived_scan_only_length) + curr->set_scan_only(); + else + finished_short_lived = true; + } + + + if (finished_short_lived) + return; + } + + guarantee( false, "we should never reach here" ); +} + +void G1CollectorPolicy::record_popular_pause_preamble_start() { + _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0; +} + +void G1CollectorPolicy::record_popular_pause_preamble_end() { + _cur_popular_preamble_time_ms = + (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms; + + // copy the recorded statistics of the first pass to temporary arrays + for (int i = 0; i < _parallel_gc_threads; ++i) { + _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i]; + _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i]; + _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i]; + _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i]; + _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i]; + _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i]; + } +} + +void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { + _mark_closure_time_ms = mark_closure_time_ms; +} + +void G1CollectorPolicy::record_concurrent_mark_init_start() { + _mark_init_start_sec = os::elapsedTime(); + guarantee(!in_young_gc_mode(), "should not do be here in young GC mode"); +} + +void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double + mark_init_elapsed_time_ms) { + _during_marking = true; + _should_initiate_conc_mark = false; + _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms; +} + +void G1CollectorPolicy::record_concurrent_mark_init_end() { + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0; + _concurrent_mark_init_times_ms->add(elapsed_time_ms); + checkpoint_conc_overhead(); + record_concurrent_mark_init_end_pre(elapsed_time_ms); + + _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true); +} + +void G1CollectorPolicy::record_concurrent_mark_remark_start() { + _mark_remark_start_sec = os::elapsedTime(); + _during_marking = false; +} + +void G1CollectorPolicy::record_concurrent_mark_remark_end() { + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_remark_start_sec)*1000.0; + checkpoint_conc_overhead(); + _concurrent_mark_remark_times_ms->add(elapsed_time_ms); + _cur_mark_stop_world_time_ms += elapsed_time_ms; + _prev_collection_pause_end_ms += elapsed_time_ms; + + _mmu_tracker->add_pause(_mark_remark_start_sec, end_time_sec, true); +} + +void G1CollectorPolicy::record_concurrent_mark_cleanup_start() { + _mark_cleanup_start_sec = os::elapsedTime(); +} + +void +G1CollectorPolicy::record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes) { + record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes); + record_concurrent_mark_cleanup_end_work2(); +} + +void +G1CollectorPolicy:: +record_concurrent_mark_cleanup_end_work1(size_t freed_bytes, + size_t max_live_bytes) { + if (_n_marks < 2) _n_marks++; + if (G1PolicyVerbose > 0) + gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB " + " (of " SIZE_FORMAT " MB heap).", + max_live_bytes/M, _g1->capacity()/M); +} + +// The important thing about this is that it includes "os::elapsedTime". +void G1CollectorPolicy::record_concurrent_mark_cleanup_end_work2() { + checkpoint_conc_overhead(); + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_cleanup_start_sec)*1000.0; + _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms); + _cur_mark_stop_world_time_ms += elapsed_time_ms; + _prev_collection_pause_end_ms += elapsed_time_ms; + + _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true); + + _num_markings++; + + // We did a marking, so reset the "since_last_mark" variables. + double considerConcMarkCost = 1.0; + // If there are available processors, concurrent activity is free... + if (Threads::number_of_non_daemon_threads() * 2 < + os::active_processor_count()) { + considerConcMarkCost = 0.0; + } + _n_pauses_at_mark_end = _n_pauses; + _n_marks_since_last_pause++; + _conc_mark_initiated = false; +} + +void +G1CollectorPolicy::record_concurrent_mark_cleanup_completed() { + if (in_young_gc_mode()) { + _should_revert_to_full_young_gcs = false; + _last_full_young_gc = true; + _in_marking_window = false; + if (adaptive_young_list_length()) + calculate_young_list_target_config(); + } +} + +void G1CollectorPolicy::record_concurrent_pause() { + if (_stop_world_start > 0.0) { + double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0; + _all_yield_times_ms->add(yield_ms); + } +} + +void G1CollectorPolicy::record_concurrent_pause_end() { +} + +void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() { + _cur_CH_strong_roots_end_sec = os::elapsedTime(); + _cur_CH_strong_roots_dur_ms = + (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0; +} + +void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() { + _cur_G1_strong_roots_end_sec = os::elapsedTime(); + _cur_G1_strong_roots_dur_ms = + (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0; +} + +template +T sum_of(T* sum_arr, int start, int n, int N) { + T sum = (T)0; + for (int i = 0; i < n; i++) { + int j = (start + i) % N; + sum += sum_arr[j]; + } + return sum; +} + +void G1CollectorPolicy::print_par_stats (int level, + const char* str, + double* data, + bool summary) { + double min = data[0], max = data[0]; + double total = 0.0; + int j; + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("[%s (ms):", str); + for (uint i = 0; i < ParallelGCThreads; ++i) { + double val = data[i]; + if (val < min) + min = val; + if (val > max) + max = val; + total += val; + gclog_or_tty->print(" %3.1lf", val); + } + if (summary) { + gclog_or_tty->print_cr(""); + double avg = total / (double) ParallelGCThreads; + gclog_or_tty->print(" "); + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf", + avg, min, max); + } + gclog_or_tty->print_cr("]"); +} + +void G1CollectorPolicy::print_par_buffers (int level, + const char* str, + double* data, + bool summary) { + double min = data[0], max = data[0]; + double total = 0.0; + int j; + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("[%s :", str); + for (uint i = 0; i < ParallelGCThreads; ++i) { + double val = data[i]; + if (val < min) + min = val; + if (val > max) + max = val; + total += val; + gclog_or_tty->print(" %d", (int) val); + } + if (summary) { + gclog_or_tty->print_cr(""); + double avg = total / (double) ParallelGCThreads; + gclog_or_tty->print(" "); + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("Sum: %d, Avg: %d, Min: %d, Max: %d", + (int)total, (int)avg, (int)min, (int)max); + } + gclog_or_tty->print_cr("]"); +} + +void G1CollectorPolicy::print_stats (int level, + const char* str, + double value) { + for (int j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print_cr("[%s: %5.1lf ms]", str, value); +} + +void G1CollectorPolicy::print_stats (int level, + const char* str, + int value) { + for (int j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print_cr("[%s: %d]", str, value); +} + +double G1CollectorPolicy::avg_value (double* data) { + if (ParallelGCThreads > 0) { + double ret = 0.0; + for (uint i = 0; i < ParallelGCThreads; ++i) + ret += data[i]; + return ret / (double) ParallelGCThreads; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::max_value (double* data) { + if (ParallelGCThreads > 0) { + double ret = data[0]; + for (uint i = 1; i < ParallelGCThreads; ++i) + if (data[i] > ret) + ret = data[i]; + return ret; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::sum_of_values (double* data) { + if (ParallelGCThreads > 0) { + double sum = 0.0; + for (uint i = 0; i < ParallelGCThreads; i++) + sum += data[i]; + return sum; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::max_sum (double* data1, + double* data2) { + double ret = data1[0] + data2[0]; + + if (ParallelGCThreads > 0) { + for (uint i = 1; i < ParallelGCThreads; ++i) { + double data = data1[i] + data2[i]; + if (data > ret) + ret = data; + } + } + return ret; +} + +// Anything below that is considered to be zero +#define MIN_TIMER_GRANULARITY 0.0000001 + +void G1CollectorPolicy::record_collection_pause_end(bool popular, + bool abandoned) { + double end_time_sec = os::elapsedTime(); + double elapsed_ms = _last_pause_time_ms; + bool parallel = ParallelGCThreads > 0; + double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0; + size_t rs_size = + _cur_collection_pause_used_regions_at_start - collection_set_size(); + size_t cur_used_bytes = _g1->used(); + assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); + bool last_pause_included_initial_mark = false; + +#ifndef PRODUCT + if (G1YoungSurvRateVerbose) { + gclog_or_tty->print_cr(""); + _short_lived_surv_rate_group->print(); + // do that for any other surv rate groups too + } +#endif // PRODUCT + + checkpoint_conc_overhead(); + + if (in_young_gc_mode()) { + last_pause_included_initial_mark = _should_initiate_conc_mark; + if (last_pause_included_initial_mark) + record_concurrent_mark_init_end_pre(0.0); + + size_t min_used_targ = + (_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta); + + if (cur_used_bytes > min_used_targ) { + if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) { + } else if (!_g1->mark_in_progress() && !_last_full_young_gc) { + _should_initiate_conc_mark = true; + } + } + + _prev_collection_pause_used_at_end_bytes = cur_used_bytes; + } + + _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, + end_time_sec, false); + + guarantee(_cur_collection_pause_used_regions_at_start >= + collection_set_size(), + "Negative RS size?"); + + // This assert is exempted when we're doing parallel collection pauses, + // because the fragmentation caused by the parallel GC allocation buffers + // can lead to more memory being used during collection than was used + // before. Best leave this out until the fragmentation problem is fixed. + // Pauses in which evacuation failed can also lead to negative + // collections, since no space is reclaimed from a region containing an + // object whose evacuation failed. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || parallel) // Always using GC LABs now. + || _g1->evacuation_failed() + || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes, + "Negative collection"); + + size_t freed_bytes = + _cur_collection_pause_used_at_start_bytes - cur_used_bytes; + size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes; + double survival_fraction = + (double)surviving_bytes/ + (double)_collection_set_bytes_used_before; + + _n_pauses++; + + if (!abandoned) { + _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms); + _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms); + _recent_evac_times_ms->add(evac_ms); + _recent_pause_times_ms->add(elapsed_ms); + + _recent_rs_sizes->add(rs_size); + + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. Same with evac + // failure. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05. + assert((true || parallel) + || _g1->evacuation_failed() + || surviving_bytes <= _collection_set_bytes_used_before, + "Or else negative collection!"); + _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before); + _recent_CS_bytes_surviving->add(surviving_bytes); + + // this is where we update the allocation rate of the application + double app_time_ms = + (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms); + if (app_time_ms < MIN_TIMER_GRANULARITY) { + // This usually happens due to the timer not having the required + // granularity. Some Linuxes are the usual culprits. + // We'll just set it to something (arbitrarily) small. + app_time_ms = 1.0; + } + size_t regions_allocated = + (_region_num_young - _prev_region_num_young) + + (_region_num_tenured - _prev_region_num_tenured); + double alloc_rate_ms = (double) regions_allocated / app_time_ms; + _alloc_rate_ms_seq->add(alloc_rate_ms); + _prev_region_num_young = _region_num_young; + _prev_region_num_tenured = _region_num_tenured; + + double interval_ms = + (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0; + update_recent_gc_times(end_time_sec, elapsed_ms); + _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms; + assert(recent_avg_pause_time_ratio() < 1.00, "All GC?"); + } + + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); + } + + PauseSummary* summary; + if (!abandoned && !popular) + summary = _non_pop_summary; + else if (!abandoned && popular) + summary = _pop_summary; + else if (abandoned && !popular) + summary = _non_pop_abandoned_summary; + else if (abandoned && popular) + summary = _pop_abandoned_summary; + else + guarantee(false, "should not get here!"); + + double pop_update_rs_time; + double pop_update_rs_processed_buffers; + double pop_scan_rs_time; + double pop_closure_app_time; + double pop_other_time; + + if (popular) { + PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); + guarantee(preamble_summary != NULL, "should not be null!"); + + pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms); + pop_update_rs_processed_buffers = + sum_of_values(_pop_par_last_update_rs_processed_buffers); + pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms); + pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms); + pop_other_time = _cur_popular_preamble_time_ms - + (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time + + _cur_popular_evac_time_ms); + + preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms); + preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time); + preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time); + preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time); + preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms); + preamble_summary->record_pop_other_time_ms(pop_other_time); + } + + double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); + double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms); + double scan_only_time = avg_value(_par_last_scan_only_times_ms); + double scan_only_regions_scanned = + sum_of_values(_par_last_scan_only_regions_scanned); + double update_rs_time = avg_value(_par_last_update_rs_times_ms); + double update_rs_processed_buffers = + sum_of_values(_par_last_update_rs_processed_buffers); + double scan_rs_time = avg_value(_par_last_scan_rs_times_ms); + double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); + double termination_time = avg_value(_par_last_termination_times_ms); + + double parallel_other_time; + if (!abandoned) { + MainBodySummary* body_summary = summary->main_body_summary(); + guarantee(body_summary != NULL, "should not be null!"); + + if (_satb_drain_time_set) + body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); + else + body_summary->record_satb_drain_time_ms(0.0); + body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); + body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time); + body_summary->record_scan_only_time_ms(scan_only_time); + body_summary->record_update_rs_time_ms(update_rs_time); + body_summary->record_scan_rs_time_ms(scan_rs_time); + body_summary->record_obj_copy_time_ms(obj_copy_time); + if (parallel) { + body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); + body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); + body_summary->record_termination_time_ms(termination_time); + parallel_other_time = _cur_collection_par_time_ms - + (update_rs_time + ext_root_scan_time + mark_stack_scan_time + + scan_only_time + scan_rs_time + obj_copy_time + termination_time); + body_summary->record_parallel_other_time_ms(parallel_other_time); + } + body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); + } + + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n" + " CH Strong: %10.6f ms (avg: %10.6f ms)\n" + " G1 Strong: %10.6f ms (avg: %10.6f ms)\n" + " Evac: %10.6f ms (avg: %10.6f ms)\n" + " ET-RS: %10.6f ms (avg: %10.6f ms)\n" + " |RS|: " SIZE_FORMAT, + elapsed_ms, recent_avg_time_for_pauses_ms(), + _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(), + _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(), + evac_ms, recent_avg_time_for_evac_ms(), + scan_rs_time, + recent_avg_time_for_pauses_ms() - + recent_avg_time_for_G1_strong_ms(), + rs_size); + + gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K" + " At end " SIZE_FORMAT "K\n" + " garbage : " SIZE_FORMAT "K" + " of " SIZE_FORMAT "K\n" + " survival : %6.2f%% (%6.2f%% avg)", + _cur_collection_pause_used_at_start_bytes/K, + _g1->used()/K, freed_bytes/K, + _collection_set_bytes_used_before/K, + survival_fraction*100.0, + recent_avg_survival_fraction()*100.0); + gclog_or_tty->print_cr(" Recent %% gc pause time: %6.2f", + recent_avg_pause_time_ratio() * 100.0); + } + + double other_time_ms = elapsed_ms; + if (popular) + other_time_ms -= _cur_popular_preamble_time_ms; + + if (!abandoned) { + if (_satb_drain_time_set) + other_time_ms -= _cur_satb_drain_time_ms; + + if (parallel) + other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms; + else + other_time_ms -= + update_rs_time + + ext_root_scan_time + mark_stack_scan_time + scan_only_time + + scan_rs_time + obj_copy_time; + } + + if (PrintGCDetails) { + gclog_or_tty->print_cr("%s%s, %1.8lf secs]", + (popular && !abandoned) ? " (popular)" : + (!popular && abandoned) ? " (abandoned)" : + (popular && abandoned) ? " (popular/abandoned)" : "", + (last_pause_included_initial_mark) ? " (initial-mark)" : "", + elapsed_ms / 1000.0); + + if (!abandoned) { + if (_satb_drain_time_set) + print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); + if (_last_satb_drain_processed_buffers >= 0) + print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); + } + if (popular) + print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms); + if (parallel) { + if (popular) { + print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false); + print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms); + if (G1RSBarrierUseQueue) + print_par_buffers(3, "Processed Buffers", + _pop_par_last_update_rs_processed_buffers, true); + print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms); + print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms); + print_stats(2, "Evacuation", _cur_popular_evac_time_ms); + print_stats(2, "Other", pop_other_time); + } + if (!abandoned) { + print_stats(1, "Parallel Time", _cur_collection_par_time_ms); + if (!popular) { + print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); + print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); + if (G1RSBarrierUseQueue) + print_par_buffers(3, "Processed Buffers", + _par_last_update_rs_processed_buffers, true); + } + print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); + print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); + print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); + print_par_buffers(3, "Scan-Only Regions", + _par_last_scan_only_regions_scanned, true); + print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms); + print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms); + print_par_stats(2, "Termination", _par_last_termination_times_ms); + print_stats(2, "Other", parallel_other_time); + print_stats(1, "Clear CT", _cur_clear_ct_time_ms); + } + } else { + if (popular) { + print_stats(2, "Update RS", pop_update_rs_time); + if (G1RSBarrierUseQueue) + print_stats(3, "Processed Buffers", + (int)pop_update_rs_processed_buffers); + print_stats(2, "Scan RS", pop_scan_rs_time); + print_stats(2, "Closure App", pop_closure_app_time); + print_stats(2, "Evacuation", _cur_popular_evac_time_ms); + print_stats(2, "Other", pop_other_time); + } + if (!abandoned) { + if (!popular) { + print_stats(1, "Update RS", update_rs_time); + if (G1RSBarrierUseQueue) + print_stats(2, "Processed Buffers", + (int)update_rs_processed_buffers); + } + print_stats(1, "Ext Root Scanning", ext_root_scan_time); + print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); + print_stats(1, "Scan-Only Scanning", scan_only_time); + print_stats(1, "Scan RS", scan_rs_time); + print_stats(1, "Object Copying", obj_copy_time); + } + } + print_stats(1, "Other", other_time_ms); + for (int i = 0; i < _aux_num; ++i) { + if (_cur_aux_times_set[i]) { + char buffer[96]; + sprintf(buffer, "Aux%d", i); + print_stats(1, buffer, _cur_aux_times_ms[i]); + } + } + } + if (PrintGCDetails) + gclog_or_tty->print(" ["); + if (PrintGC || PrintGCDetails) + _g1->print_size_transition(gclog_or_tty, + _cur_collection_pause_used_at_start_bytes, + _g1->used(), _g1->capacity()); + if (PrintGCDetails) + gclog_or_tty->print_cr("]"); + + _all_pause_times_ms->add(elapsed_ms); + summary->record_total_time_ms(elapsed_ms); + summary->record_other_time_ms(other_time_ms); + for (int i = 0; i < _aux_num; ++i) + if (_cur_aux_times_set[i]) + _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); + + // Reset marks-between-pauses counter. + _n_marks_since_last_pause = 0; + + // Update the efficiency-since-mark vars. + double proc_ms = elapsed_ms * (double) _parallel_gc_threads; + if (elapsed_ms < MIN_TIMER_GRANULARITY) { + // This usually happens due to the timer not having the required + // granularity. Some Linuxes are the usual culprits. + // We'll just set it to something (arbitrarily) small. + proc_ms = 1.0; + } + double cur_efficiency = (double) freed_bytes / proc_ms; + + bool new_in_marking_window = _in_marking_window; + bool new_in_marking_window_im = false; + if (_should_initiate_conc_mark) { + new_in_marking_window = true; + new_in_marking_window_im = true; + } + + if (in_young_gc_mode()) { + if (_last_full_young_gc) { + set_full_young_gcs(false); + _last_full_young_gc = false; + } + + if ( !_last_young_gc_full ) { + if ( _should_revert_to_full_young_gcs || + _known_garbage_ratio < 0.05 || + (adaptive_young_list_length() && + (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) { + set_full_young_gcs(true); + } + } + _should_revert_to_full_young_gcs = false; + + if (_last_young_gc_full && !_during_marking) + _young_gc_eff_seq->add(cur_efficiency); + } + + _short_lived_surv_rate_group->start_adding_regions(); + // do that for any other surv rate groupsx + + // + + if (!popular && !abandoned) { + double pause_time_ms = elapsed_ms; + + size_t diff = 0; + if (_max_pending_cards >= _pending_cards) + diff = _max_pending_cards - _pending_cards; + _pending_card_diff_seq->add((double) diff); + + double cost_per_card_ms = 0.0; + if (_pending_cards > 0) { + cost_per_card_ms = update_rs_time / (double) _pending_cards; + _cost_per_card_ms_seq->add(cost_per_card_ms); + } + + double cost_per_scan_only_region_ms = 0.0; + if (scan_only_regions_scanned > 0.0) { + cost_per_scan_only_region_ms = + scan_only_time / scan_only_regions_scanned; + if (_in_marking_window_im) + _cost_per_scan_only_region_ms_during_cm_seq->add(cost_per_scan_only_region_ms); + else + _cost_per_scan_only_region_ms_seq->add(cost_per_scan_only_region_ms); + } + + size_t cards_scanned = _g1->cards_scanned(); + + double cost_per_entry_ms = 0.0; + if (cards_scanned > 10) { + cost_per_entry_ms = scan_rs_time / (double) cards_scanned; + if (_last_young_gc_full) + _cost_per_entry_ms_seq->add(cost_per_entry_ms); + else + _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms); + } + + if (_max_rs_lengths > 0) { + double cards_per_entry_ratio = + (double) cards_scanned / (double) _max_rs_lengths; + if (_last_young_gc_full) + _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + else + _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + } + + size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths; + if (rs_length_diff >= 0) + _rs_length_diff_seq->add((double) rs_length_diff); + + size_t copied_bytes = surviving_bytes; + double cost_per_byte_ms = 0.0; + if (copied_bytes > 0) { + cost_per_byte_ms = obj_copy_time / (double) copied_bytes; + if (_in_marking_window) + _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); + else + _cost_per_byte_ms_seq->add(cost_per_byte_ms); + } + + double all_other_time_ms = pause_time_ms - + (update_rs_time + scan_only_time + scan_rs_time + obj_copy_time + + _mark_closure_time_ms + termination_time); + + double young_other_time_ms = 0.0; + if (_recorded_young_regions > 0) { + young_other_time_ms = + _recorded_young_cset_choice_time_ms + + _recorded_young_free_cset_time_ms; + _young_other_cost_per_region_ms_seq->add(young_other_time_ms / + (double) _recorded_young_regions); + } + double non_young_other_time_ms = 0.0; + if (_recorded_non_young_regions > 0) { + non_young_other_time_ms = + _recorded_non_young_cset_choice_time_ms + + _recorded_non_young_free_cset_time_ms; + + _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms / + (double) _recorded_non_young_regions); + } + + double constant_other_time_ms = all_other_time_ms - + (young_other_time_ms + non_young_other_time_ms); + _constant_other_time_ms_seq->add(constant_other_time_ms); + + double survival_ratio = 0.0; + if (_bytes_in_collection_set_before_gc > 0) { + survival_ratio = (double) bytes_in_to_space_during_gc() / + (double) _bytes_in_collection_set_before_gc; + } + + _pending_cards_seq->add((double) _pending_cards); + _scanned_cards_seq->add((double) cards_scanned); + _rs_lengths_seq->add((double) _max_rs_lengths); + + double expensive_region_limit_ms = + (double) G1MaxPauseTimeMS - predict_constant_other_time_ms(); + if (expensive_region_limit_ms < 0.0) { + // this means that the other time was predicted to be longer than + // than the max pause time + expensive_region_limit_ms = (double) G1MaxPauseTimeMS; + } + _expensive_region_limit_ms = expensive_region_limit_ms; + + if (PREDICTIONS_VERBOSE) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d " + "REGIONS %d %d %d %d " + "PENDING_CARDS %d %d " + "CARDS_SCANNED %d %d " + "RS_LENGTHS %d %d " + "SCAN_ONLY_SCAN %1.6lf %1.6lf " + "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf " + "SURVIVAL_RATIO %1.6lf %1.6lf " + "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf " + "OTHER_YOUNG %1.6lf %1.6lf " + "OTHER_NON_YOUNG %1.6lf %1.6lf " + "VTIME_DIFF %1.6lf TERMINATION %1.6lf " + "ELAPSED %1.6lf %1.6lf ", + _cur_collection_start_sec, + (!_last_young_gc_full) ? 2 : + (last_pause_included_initial_mark) ? 1 : 0, + _recorded_region_num, + _recorded_young_regions, + _recorded_scan_only_regions, + _recorded_non_young_regions, + _predicted_pending_cards, _pending_cards, + _predicted_cards_scanned, cards_scanned, + _predicted_rs_lengths, _max_rs_lengths, + _predicted_scan_only_scan_time_ms, scan_only_time, + _predicted_rs_update_time_ms, update_rs_time, + _predicted_rs_scan_time_ms, scan_rs_time, + _predicted_survival_ratio, survival_ratio, + _predicted_object_copy_time_ms, obj_copy_time, + _predicted_constant_other_time_ms, constant_other_time_ms, + _predicted_young_other_time_ms, young_other_time_ms, + _predicted_non_young_other_time_ms, + non_young_other_time_ms, + _vtime_diff_ms, termination_time, + _predicted_pause_time_ms, elapsed_ms); + } + + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms", + _predicted_pause_time_ms, + (_within_target) ? "within" : "outside", + elapsed_ms); + } + + } + + _in_marking_window = new_in_marking_window; + _in_marking_window_im = new_in_marking_window_im; + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = _g1->young_list_length(); + calculate_young_list_min_length(); + calculate_young_list_target_config(); + + // + + _target_pause_time_ms = -1.0; + + // TODO: calculate tenuring threshold + _tenuring_threshold = MaxTenuringThreshold; +} + +// + +double +G1CollectorPolicy:: +predict_young_collection_elapsed_time_ms(size_t adjustment) { + guarantee( adjustment == 0 || adjustment == 1, "invariant" ); + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t young_num = g1h->young_list_length(); + if (young_num == 0) + return 0.0; + + young_num += adjustment; + size_t pending_cards = predict_pending_cards(); + size_t rs_lengths = g1h->young_list_sampled_rs_lengths() + + predict_rs_length_diff(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_lengths); + else + card_num = predict_non_young_card_num(rs_lengths); + size_t young_byte_size = young_num * HeapRegion::GrainBytes; + double accum_yg_surv_rate = + _short_lived_surv_rate_group->accum_surv_rate(adjustment); + + size_t bytes_to_copy = + (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes); + + return + predict_rs_update_time_ms(pending_cards) + + predict_rs_scan_time_ms(card_num) + + predict_object_copy_time_ms(bytes_to_copy) + + predict_young_other_time_ms(young_num) + + predict_constant_other_time_ms(); +} + +double +G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) { + size_t rs_length = predict_rs_length_diff(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_length); + else + card_num = predict_non_young_card_num(rs_length); + return predict_base_elapsed_time_ms(pending_cards, card_num); +} + +double +G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards, + size_t scanned_cards) { + return + predict_rs_update_time_ms(pending_cards) + + predict_rs_scan_time_ms(scanned_cards) + + predict_constant_other_time_ms(); +} + +double +G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr, + bool young) { + size_t rs_length = hr->rem_set()->occupied(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_length); + else + card_num = predict_non_young_card_num(rs_length); + size_t bytes_to_copy = predict_bytes_to_copy(hr); + + double region_elapsed_time_ms = + predict_rs_scan_time_ms(card_num) + + predict_object_copy_time_ms(bytes_to_copy); + + if (young) + region_elapsed_time_ms += predict_young_other_time_ms(1); + else + region_elapsed_time_ms += predict_non_young_other_time_ms(1); + + return region_elapsed_time_ms; +} + +size_t +G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) { + size_t bytes_to_copy; + if (hr->is_marked()) + bytes_to_copy = hr->max_live_bytes(); + else { + guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1, + "invariant" ); + int age = hr->age_in_surv_rate_group(); + double yg_surv_rate = predict_yg_surv_rate(age); + bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate); + } + + return bytes_to_copy; +} + +void +G1CollectorPolicy::start_recording_regions() { + _recorded_rs_lengths = 0; + _recorded_scan_only_regions = 0; + _recorded_young_regions = 0; + _recorded_non_young_regions = 0; + +#if PREDICTIONS_VERBOSE + _predicted_rs_lengths = 0; + _predicted_cards_scanned = 0; + + _recorded_marked_bytes = 0; + _recorded_young_bytes = 0; + _predicted_bytes_to_copy = 0; +#endif // PREDICTIONS_VERBOSE +} + +void +G1CollectorPolicy::record_cset_region(HeapRegion* hr, bool young) { + if (young) { + ++_recorded_young_regions; + } else { + ++_recorded_non_young_regions; + } +#if PREDICTIONS_VERBOSE + if (young) { + _recorded_young_bytes += hr->asSpace()->used(); + } else { + _recorded_marked_bytes += hr->max_live_bytes(); + } + _predicted_bytes_to_copy += predict_bytes_to_copy(hr); +#endif // PREDICTIONS_VERBOSE + + size_t rs_length = hr->rem_set()->occupied(); + _recorded_rs_lengths += rs_length; +} + +void +G1CollectorPolicy::record_scan_only_regions(size_t scan_only_length) { + _recorded_scan_only_regions = scan_only_length; +} + +void +G1CollectorPolicy::end_recording_regions() { +#if PREDICTIONS_VERBOSE + _predicted_pending_cards = predict_pending_cards(); + _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff(); + if (full_young_gcs()) + _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths); + else + _predicted_cards_scanned += + predict_non_young_card_num(_predicted_rs_lengths); + _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; + + _predicted_young_survival_ratio = 0.0; + for (int i = 0; i < _recorded_young_regions; ++i) + _predicted_young_survival_ratio += predict_yg_surv_rate(i); + _predicted_young_survival_ratio /= (double) _recorded_young_regions; + + _predicted_scan_only_scan_time_ms = + predict_scan_only_time_ms(_recorded_scan_only_regions); + _predicted_rs_update_time_ms = + predict_rs_update_time_ms(_g1->pending_card_num()); + _predicted_rs_scan_time_ms = + predict_rs_scan_time_ms(_predicted_cards_scanned); + _predicted_object_copy_time_ms = + predict_object_copy_time_ms(_predicted_bytes_to_copy); + _predicted_constant_other_time_ms = + predict_constant_other_time_ms(); + _predicted_young_other_time_ms = + predict_young_other_time_ms(_recorded_young_regions); + _predicted_non_young_other_time_ms = + predict_non_young_other_time_ms(_recorded_non_young_regions); + + _predicted_pause_time_ms = + _predicted_scan_only_scan_time_ms + + _predicted_rs_update_time_ms + + _predicted_rs_scan_time_ms + + _predicted_object_copy_time_ms + + _predicted_constant_other_time_ms + + _predicted_young_other_time_ms + + _predicted_non_young_other_time_ms; +#endif // PREDICTIONS_VERBOSE +} + +void G1CollectorPolicy::check_if_region_is_too_expensive(double + predicted_time_ms) { + // I don't think we need to do this when in young GC mode since + // marking will be initiated next time we hit the soft limit anyway... + if (predicted_time_ms > _expensive_region_limit_ms) { + if (!in_young_gc_mode()) { + set_full_young_gcs(true); + _should_initiate_conc_mark = true; + } else + // no point in doing another partial one + _should_revert_to_full_young_gcs = true; + } +} + +// + + +void G1CollectorPolicy::update_recent_gc_times(double end_time_sec, + double elapsed_ms) { + _recent_gc_times_ms->add(elapsed_ms); + _recent_prev_end_times_for_all_gcs_sec->add(end_time_sec); + _prev_collection_pause_end_ms = end_time_sec * 1000.0; +} + +double G1CollectorPolicy::recent_avg_time_for_pauses_ms() { + if (_recent_pause_times_ms->num() == 0) return (double) G1MaxPauseTimeMS; + else return _recent_pause_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() { + if (_recent_CH_strong_roots_times_ms->num() == 0) + return (double)G1MaxPauseTimeMS/3.0; + else return _recent_CH_strong_roots_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() { + if (_recent_G1_strong_roots_times_ms->num() == 0) + return (double)G1MaxPauseTimeMS/3.0; + else return _recent_G1_strong_roots_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_evac_ms() { + if (_recent_evac_times_ms->num() == 0) return (double)G1MaxPauseTimeMS/3.0; + else return _recent_evac_times_ms->avg(); +} + +int G1CollectorPolicy::number_of_recent_gcs() { + assert(_recent_CH_strong_roots_times_ms->num() == + _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync"); + assert(_recent_G1_strong_roots_times_ms->num() == + _recent_evac_times_ms->num(), "Sequence out of sync"); + assert(_recent_evac_times_ms->num() == + _recent_pause_times_ms->num(), "Sequence out of sync"); + assert(_recent_pause_times_ms->num() == + _recent_CS_bytes_used_before->num(), "Sequence out of sync"); + assert(_recent_CS_bytes_used_before->num() == + _recent_CS_bytes_surviving->num(), "Sequence out of sync"); + return _recent_pause_times_ms->num(); +} + +double G1CollectorPolicy::recent_avg_survival_fraction() { + return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving, + _recent_CS_bytes_used_before); +} + +double G1CollectorPolicy::last_survival_fraction() { + return last_survival_fraction_work(_recent_CS_bytes_surviving, + _recent_CS_bytes_used_before); +} + +double +G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before) { + assert(surviving->num() == before->num(), "Sequence out of sync"); + if (before->sum() > 0.0) { + double recent_survival_rate = surviving->sum() / before->sum(); + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || ParallelGCThreads > 0) || + _g1->evacuation_failed() || + recent_survival_rate <= 1.0, "Or bad frac"); + return recent_survival_rate; + } else { + return 1.0; // Be conservative. + } +} + +double +G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before) { + assert(surviving->num() == before->num(), "Sequence out of sync"); + if (surviving->num() > 0 && before->last() > 0.0) { + double last_survival_rate = surviving->last() / before->last(); + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || ParallelGCThreads > 0) || + last_survival_rate <= 1.0, "Or bad frac"); + return last_survival_rate; + } else { + return 1.0; + } +} + +static const int survival_min_obs = 5; +static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 }; +static const double min_survival_rate = 0.1; + +double +G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg, + double latest) { + double res = avg; + if (number_of_recent_gcs() < survival_min_obs) { + res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]); + } + res = MAX2(res, latest); + res = MAX2(res, min_survival_rate); + // In the parallel case, LAB fragmentation can produce "negative + // collections"; so can evac failure. Cap at 1.0 + res = MIN2(res, 1.0); + return res; +} + +size_t G1CollectorPolicy::expansion_amount() { + if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPct) { + // We will double the existing space, or take G1ExpandByPctOfAvail % of + // the available expansion space, whichever is smaller, bounded below + // by a minimum expansion (unless that's all that's left.) + const size_t min_expand_bytes = 1*M; + size_t reserved_bytes = _g1->g1_reserved_obj_bytes(); + size_t committed_bytes = _g1->capacity(); + size_t uncommitted_bytes = reserved_bytes - committed_bytes; + size_t expand_bytes; + size_t expand_bytes_via_pct = + uncommitted_bytes * G1ExpandByPctOfAvail / 100; + expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes); + expand_bytes = MAX2(expand_bytes, min_expand_bytes); + expand_bytes = MIN2(expand_bytes, uncommitted_bytes); + if (G1PolicyVerbose > 1) { + gclog_or_tty->print("Decided to expand: ratio = %5.2f, " + "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n" + " Answer = %d.\n", + recent_avg_pause_time_ratio(), + byte_size_in_proper_unit(committed_bytes), + proper_unit_for_byte_size(committed_bytes), + byte_size_in_proper_unit(uncommitted_bytes), + proper_unit_for_byte_size(uncommitted_bytes), + byte_size_in_proper_unit(expand_bytes_via_pct), + proper_unit_for_byte_size(expand_bytes_via_pct), + byte_size_in_proper_unit(expand_bytes), + proper_unit_for_byte_size(expand_bytes)); + } + return expand_bytes; + } else { + return 0; + } +} + +void G1CollectorPolicy::note_start_of_mark_thread() { + _mark_thread_startup_sec = os::elapsedTime(); +} + +class CountCSClosure: public HeapRegionClosure { + G1CollectorPolicy* _g1_policy; +public: + CountCSClosure(G1CollectorPolicy* g1_policy) : + _g1_policy(g1_policy) {} + bool doHeapRegion(HeapRegion* r) { + _g1_policy->_bytes_in_collection_set_before_gc += r->used(); + return false; + } +}; + +void G1CollectorPolicy::count_CS_bytes_used() { + CountCSClosure cs_closure(this); + _g1->collection_set_iterate(&cs_closure); +} + +static void print_indent(int level) { + for (int j = 0; j < level+1; ++j) + gclog_or_tty->print(" "); +} + +void G1CollectorPolicy::print_summary (int level, + const char* str, + NumberSeq* seq) const { + double sum = seq->sum(); + print_indent(level); + gclog_or_tty->print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)", + str, sum / 1000.0, seq->avg()); +} + +void G1CollectorPolicy::print_summary_sd (int level, + const char* str, + NumberSeq* seq) const { + print_summary(level, str, seq); + print_indent(level + 5); + gclog_or_tty->print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)", + seq->num(), seq->sd(), seq->maximum()); +} + +void G1CollectorPolicy::check_other_times(int level, + NumberSeq* other_times_ms, + NumberSeq* calc_other_times_ms) const { + bool should_print = false; + + double max_sum = MAX2(fabs(other_times_ms->sum()), + fabs(calc_other_times_ms->sum())); + double min_sum = MIN2(fabs(other_times_ms->sum()), + fabs(calc_other_times_ms->sum())); + double sum_ratio = max_sum / min_sum; + if (sum_ratio > 1.1) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###"); + } + + double max_avg = MAX2(fabs(other_times_ms->avg()), + fabs(calc_other_times_ms->avg())); + double min_avg = MIN2(fabs(other_times_ms->avg()), + fabs(calc_other_times_ms->avg())); + double avg_ratio = max_avg / min_avg; + if (avg_ratio > 1.1) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###"); + } + + if (other_times_ms->sum() < -0.01) { + print_indent(level + 1); + gclog_or_tty->print_cr("## RECORDED OTHER SUM IS NEGATIVE ###"); + } + + if (other_times_ms->avg() < -0.01) { + print_indent(level + 1); + gclog_or_tty->print_cr("## RECORDED OTHER AVG IS NEGATIVE ###"); + } + + if (calc_other_times_ms->sum() < -0.01) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###"); + } + + if (calc_other_times_ms->avg() < -0.01) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###"); + } + + if (should_print) + print_summary(level, "Other(Calc)", calc_other_times_ms); +} + +void G1CollectorPolicy::print_summary(PauseSummary* summary) const { + bool parallel = ParallelGCThreads > 0; + MainBodySummary* body_summary = summary->main_body_summary(); + PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); + + if (summary->get_total_seq()->num() > 0) { + print_summary_sd(0, + (preamble_summary == NULL) ? "Non-Popular Pauses" : + "Popular Pauses", + summary->get_total_seq()); + if (preamble_summary != NULL) { + print_summary(1, "Popularity Preamble", + preamble_summary->get_pop_preamble_seq()); + print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq()); + print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq()); + print_summary(2, "Closure App", + preamble_summary->get_pop_closure_app_seq()); + print_summary(2, "Evacuation", + preamble_summary->get_pop_evacuation_seq()); + print_summary(2, "Other", preamble_summary->get_pop_other_seq()); + { + NumberSeq* other_parts[] = { + preamble_summary->get_pop_update_rs_seq(), + preamble_summary->get_pop_scan_rs_seq(), + preamble_summary->get_pop_closure_app_seq(), + preamble_summary->get_pop_evacuation_seq() + }; + NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(), + 4, other_parts); + check_other_times(2, preamble_summary->get_pop_other_seq(), + &calc_other_times_ms); + } + } + if (body_summary != NULL) { + print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq()); + if (parallel) { + print_summary(1, "Parallel Time", body_summary->get_parallel_seq()); + print_summary(2, "Update RS", body_summary->get_update_rs_seq()); + print_summary(2, "Ext Root Scanning", + body_summary->get_ext_root_scan_seq()); + print_summary(2, "Mark Stack Scanning", + body_summary->get_mark_stack_scan_seq()); + print_summary(2, "Scan-Only Scanning", + body_summary->get_scan_only_seq()); + print_summary(2, "Scan RS", body_summary->get_scan_rs_seq()); + print_summary(2, "Object Copy", body_summary->get_obj_copy_seq()); + print_summary(2, "Termination", body_summary->get_termination_seq()); + print_summary(2, "Other", body_summary->get_parallel_other_seq()); + { + NumberSeq* other_parts[] = { + body_summary->get_update_rs_seq(), + body_summary->get_ext_root_scan_seq(), + body_summary->get_mark_stack_scan_seq(), + body_summary->get_scan_only_seq(), + body_summary->get_scan_rs_seq(), + body_summary->get_obj_copy_seq(), + body_summary->get_termination_seq() + }; + NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(), + 7, other_parts); + check_other_times(2, body_summary->get_parallel_other_seq(), + &calc_other_times_ms); + } + print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq()); + print_summary(1, "Clear CT", body_summary->get_clear_ct_seq()); + } else { + print_summary(1, "Update RS", body_summary->get_update_rs_seq()); + print_summary(1, "Ext Root Scanning", + body_summary->get_ext_root_scan_seq()); + print_summary(1, "Mark Stack Scanning", + body_summary->get_mark_stack_scan_seq()); + print_summary(1, "Scan-Only Scanning", + body_summary->get_scan_only_seq()); + print_summary(1, "Scan RS", body_summary->get_scan_rs_seq()); + print_summary(1, "Object Copy", body_summary->get_obj_copy_seq()); + } + } + print_summary(1, "Other", summary->get_other_seq()); + { + NumberSeq calc_other_times_ms; + if (body_summary != NULL) { + // not abandoned + if (parallel) { + // parallel + NumberSeq* other_parts[] = { + body_summary->get_satb_drain_seq(), + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq(), + body_summary->get_parallel_seq(), + body_summary->get_clear_ct_seq() + }; + calc_other_times_ms = NumberSeq (summary->get_total_seq(), + 4, other_parts); + } else { + // serial + NumberSeq* other_parts[] = { + body_summary->get_satb_drain_seq(), + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq(), + body_summary->get_update_rs_seq(), + body_summary->get_ext_root_scan_seq(), + body_summary->get_mark_stack_scan_seq(), + body_summary->get_scan_only_seq(), + body_summary->get_scan_rs_seq(), + body_summary->get_obj_copy_seq() + }; + calc_other_times_ms = NumberSeq(summary->get_total_seq(), + 8, other_parts); + } + } else { + // abandoned + NumberSeq* other_parts[] = { + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq() + }; + calc_other_times_ms = NumberSeq(summary->get_total_seq(), + 1, other_parts); + } + check_other_times(1, summary->get_other_seq(), &calc_other_times_ms); + } + } else { + print_indent(0); + gclog_or_tty->print_cr("none"); + } + gclog_or_tty->print_cr(""); +} + +void +G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary, + PauseSummary* pop_summary) const { + bool printed = false; + if (non_pop_summary->get_total_seq()->num() > 0) { + printed = true; + print_summary(non_pop_summary); + } + if (pop_summary->get_total_seq()->num() > 0) { + printed = true; + print_summary(pop_summary); + } + + if (!printed) { + print_indent(0); + gclog_or_tty->print_cr("none"); + gclog_or_tty->print_cr(""); + } +} + +void G1CollectorPolicy::print_tracing_info() const { + if (TraceGen0Time) { + gclog_or_tty->print_cr("ALL PAUSES"); + print_summary_sd(0, "Total", _all_pause_times_ms); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(" Full Young GC Pauses: %8d", _full_young_pause_num); + gclog_or_tty->print_cr(" Partial Young GC Pauses: %8d", _partial_young_pause_num); + gclog_or_tty->print_cr(""); + + gclog_or_tty->print_cr("NON-POPULAR PAUSES"); + print_summary(_non_pop_summary); + + gclog_or_tty->print_cr("POPULAR PAUSES"); + print_summary(_pop_summary); + + gclog_or_tty->print_cr("ABANDONED PAUSES"); + print_abandoned_summary(_non_pop_abandoned_summary, + _pop_abandoned_summary); + + gclog_or_tty->print_cr("MISC"); + print_summary_sd(0, "Stop World", _all_stop_world_times_ms); + print_summary_sd(0, "Yields", _all_yield_times_ms); + for (int i = 0; i < _aux_num; ++i) { + if (_all_aux_times_ms[i].num() > 0) { + char buffer[96]; + sprintf(buffer, "Aux%d", i); + print_summary_sd(0, buffer, &_all_aux_times_ms[i]); + } + } + + size_t all_region_num = _region_num_young + _region_num_tenured; + gclog_or_tty->print_cr(" New Regions %8d, Young %8d (%6.2lf%%), " + "Tenured %8d (%6.2lf%%)", + all_region_num, + _region_num_young, + (double) _region_num_young / (double) all_region_num * 100.0, + _region_num_tenured, + (double) _region_num_tenured / (double) all_region_num * 100.0); + + if (!G1RSBarrierUseQueue) { + gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) " + "did zero traversals.", + _conc_refine_enabled, _conc_refine_zero_traversals, + _conc_refine_enabled > 0 ? + 100.0 * (float)_conc_refine_zero_traversals/ + (float)_conc_refine_enabled : 0.0); + gclog_or_tty->print_cr(" Max # of traversals = %d.", + _conc_refine_max_traversals); + gclog_or_tty->print_cr(""); + } + } + if (TraceGen1Time) { + if (_all_full_gc_times_ms->num() > 0) { + gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s", + _all_full_gc_times_ms->num(), + _all_full_gc_times_ms->sum() / 1000.0); + gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg()); + gclog_or_tty->print_cr(" [std. dev = %8.2f ms, max = %8.2f ms]", + _all_full_gc_times_ms->sd(), + _all_full_gc_times_ms->maximum()); + } + } +} + +void G1CollectorPolicy::print_yg_surv_rate_info() const { +#ifndef PRODUCT + _short_lived_surv_rate_group->print_surv_rate_summary(); + // add this call for any other surv rate groups +#endif // PRODUCT +} + +void G1CollectorPolicy::update_conc_refine_data() { + unsigned traversals = _g1->concurrent_g1_refine()->disable(); + if (traversals == 0) _conc_refine_zero_traversals++; + _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals, + (size_t)traversals); + + if (G1PolicyVerbose > 1) + gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals); + double multiplier = 1.0; + if (traversals == 0) { + multiplier = 4.0; + } else if (traversals > (size_t)G1ConcRefineTargTraversals) { + multiplier = 1.0/1.5; + } else if (traversals < (size_t)G1ConcRefineTargTraversals) { + multiplier = 1.5; + } + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier); + gclog_or_tty->print(" Delta went from %d regions to ", + _conc_refine_current_delta); + } + _conc_refine_current_delta = + MIN2(_g1->n_regions(), + (size_t)(_conc_refine_current_delta * multiplier)); + _conc_refine_current_delta = + MAX2(_conc_refine_current_delta, (size_t)1); + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta); + } + _conc_refine_enabled++; +} + +void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) { + assert(collection_set() == NULL, "Must be no current CS."); + _collection_set_size = 0; + _collection_set_bytes_used_before = 0; + add_to_collection_set(hr); + count_CS_bytes_used(); +} + +bool +G1CollectorPolicy::should_add_next_region_to_young_list() { + assert(in_young_gc_mode(), "should be in young GC mode"); + bool ret; + size_t young_list_length = _g1->young_list_length(); + + if (young_list_length < _young_list_target_length) { + ret = true; + ++_region_num_young; + } else { + ret = false; + ++_region_num_tenured; + } + + return ret; +} + +#ifndef PRODUCT +// for debugging, bit of a hack... +static char* +region_num_to_mbs(int length) { + static char buffer[64]; + double bytes = (double) (length * HeapRegion::GrainBytes); + double mbs = bytes / (double) (1024 * 1024); + sprintf(buffer, "%7.2lfMB", mbs); + return buffer; +} +#endif // PRODUCT + +void +G1CollectorPolicy::checkpoint_conc_overhead() { + double conc_overhead = 0.0; + if (G1AccountConcurrentOverhead) + conc_overhead = COTracker::totalPredConcOverhead(); + _mmu_tracker->update_conc_overhead(conc_overhead); +#if 0 + gclog_or_tty->print(" CO %1.4lf TARGET %1.4lf", + conc_overhead, _mmu_tracker->max_gc_time()); +#endif +} + + +uint G1CollectorPolicy::max_regions(int purpose) { + switch (purpose) { + case GCAllocForSurvived: + return G1MaxSurvivorRegions; + case GCAllocForTenured: + return UINT_MAX; + default: + return UINT_MAX; + }; +} + +void +G1CollectorPolicy_BestRegionsFirst:: +set_single_region_collection_set(HeapRegion* hr) { + G1CollectorPolicy::set_single_region_collection_set(hr); + _collectionSetChooser->removeRegion(hr); +} + + +bool +G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t + word_size) { + assert(_g1->regions_accounted_for(), "Region leakage!"); + // Initiate a pause when we reach the steady-state "used" target. + size_t used_hard = (_g1->capacity() / 100) * G1SteadyStateUsed; + size_t used_soft = + MAX2((_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta), + used_hard/2); + size_t used = _g1->used(); + + double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + + size_t young_list_length = _g1->young_list_length(); + bool reached_target_length = young_list_length >= _young_list_target_length; + + if (in_young_gc_mode()) { + if (reached_target_length) { + assert( young_list_length > 0 && _g1->young_list_length() > 0, + "invariant" ); + _target_pause_time_ms = max_pause_time_ms; + return true; + } + } else { + guarantee( false, "should not reach here" ); + } + + return false; +} + +#ifndef PRODUCT +class HRSortIndexIsOKClosure: public HeapRegionClosure { + CollectionSetChooser* _chooser; +public: + HRSortIndexIsOKClosure(CollectionSetChooser* chooser) : + _chooser(chooser) {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + assert(_chooser->regionProperlyOrdered(r), "Ought to be."); + } + return false; + } +}; + +bool G1CollectorPolicy_BestRegionsFirst::assertMarkedBytesDataOK() { + HRSortIndexIsOKClosure cl(_collectionSetChooser); + _g1->heap_region_iterate(&cl); + return true; +} +#endif + +void +G1CollectorPolicy_BestRegionsFirst:: +record_collection_pause_start(double start_time_sec, size_t start_used) { + G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used); +} + +class NextNonCSElemFinder: public HeapRegionClosure { + HeapRegion* _res; +public: + NextNonCSElemFinder(): _res(NULL) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set()) { + _res = r; + return true; + } else { + return false; + } + } + HeapRegion* res() { return _res; } +}; + +class KnownGarbageClosure: public HeapRegionClosure { + CollectionSetChooser* _hrSorted; + +public: + KnownGarbageClosure(CollectionSetChooser* hrSorted) : + _hrSorted(hrSorted) + {} + + bool doHeapRegion(HeapRegion* r) { + // We only include humongous regions in collection + // sets when concurrent mark shows that their contained object is + // unreachable. + + // Do we have any marking information for this region? + if (r->is_marked()) { + // We don't include humongous regions in collection + // sets because we collect them immediately at the end of a marking + // cycle. We also don't include young regions because we *must* + // include them in the next collection pause. + if (!r->isHumongous() && !r->is_young()) { + _hrSorted->addMarkedHeapRegion(r); + } + } + return false; + } +}; + +class ParKnownGarbageHRClosure: public HeapRegionClosure { + CollectionSetChooser* _hrSorted; + jint _marked_regions_added; + jint _chunk_size; + jint _cur_chunk_idx; + jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end) + int _worker; + int _invokes; + + void get_new_chunk() { + _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size); + _cur_chunk_end = _cur_chunk_idx + _chunk_size; + } + void add_region(HeapRegion* r) { + if (_cur_chunk_idx == _cur_chunk_end) { + get_new_chunk(); + } + assert(_cur_chunk_idx < _cur_chunk_end, "postcondition"); + _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r); + _marked_regions_added++; + _cur_chunk_idx++; + } + +public: + ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted, + jint chunk_size, + int worker) : + _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker), + _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0), + _invokes(0) + {} + + bool doHeapRegion(HeapRegion* r) { + // We only include humongous regions in collection + // sets when concurrent mark shows that their contained object is + // unreachable. + _invokes++; + + // Do we have any marking information for this region? + if (r->is_marked()) { + // We don't include humongous regions in collection + // sets because we collect them immediately at the end of a marking + // cycle. + // We also do not include young regions in collection sets + if (!r->isHumongous() && !r->is_young()) { + add_region(r); + } + } + return false; + } + jint marked_regions_added() { return _marked_regions_added; } + int invokes() { return _invokes; } +}; + +class ParKnownGarbageTask: public AbstractGangTask { + CollectionSetChooser* _hrSorted; + jint _chunk_size; + G1CollectedHeap* _g1; +public: + ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) : + AbstractGangTask("ParKnownGarbageTask"), + _hrSorted(hrSorted), _chunk_size(chunk_size), + _g1(G1CollectedHeap::heap()) + {} + + void work(int i) { + ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i); + // Back to zero for the claim value. + _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i, 0); + jint regions_added = parKnownGarbageCl.marked_regions_added(); + _hrSorted->incNumMarkedHeapRegions(regions_added); + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Thread %d called %d times, added %d regions to list.\n", + i, parKnownGarbageCl.invokes(), regions_added); + } + } +}; + +void +G1CollectorPolicy_BestRegionsFirst:: +record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes) { + double start; + if (G1PrintParCleanupStats) start = os::elapsedTime(); + record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes); + + _collectionSetChooser->clearMarkedHeapRegions(); + double clear_marked_end; + if (G1PrintParCleanupStats) { + clear_marked_end = os::elapsedTime(); + gclog_or_tty->print_cr(" clear marked regions + work1: %8.3f ms.", + (clear_marked_end - start)*1000.0); + } + if (ParallelGCThreads > 0) { + const size_t OverpartitionFactor = 4; + const size_t MinChunkSize = 8; + const size_t ChunkSize = + MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), + MinChunkSize); + _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(), + ChunkSize); + ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, + (int) ChunkSize); + _g1->workers()->run_task(&parKnownGarbageTask); + } else { + KnownGarbageClosure knownGarbagecl(_collectionSetChooser); + _g1->heap_region_iterate(&knownGarbagecl); + } + double known_garbage_end; + if (G1PrintParCleanupStats) { + known_garbage_end = os::elapsedTime(); + gclog_or_tty->print_cr(" compute known garbage: %8.3f ms.", + (known_garbage_end - clear_marked_end)*1000.0); + } + _collectionSetChooser->sortMarkedHeapRegions(); + double sort_end; + if (G1PrintParCleanupStats) { + sort_end = os::elapsedTime(); + gclog_or_tty->print_cr(" sorting: %8.3f ms.", + (sort_end - known_garbage_end)*1000.0); + } + + record_concurrent_mark_cleanup_end_work2(); + double work2_end; + if (G1PrintParCleanupStats) { + work2_end = os::elapsedTime(); + gclog_or_tty->print_cr(" work2: %8.3f ms.", + (work2_end - sort_end)*1000.0); + } +} + +// Add the heap region to the collection set and return the conservative +// estimate of the number of live bytes. +void G1CollectorPolicy:: +add_to_collection_set(HeapRegion* hr) { + if (G1TraceRegions) { + gclog_or_tty->print_cr("added region to cset %d:["PTR_FORMAT", "PTR_FORMAT"], " + "top "PTR_FORMAT", young %s", + hr->hrs_index(), hr->bottom(), hr->end(), + hr->top(), (hr->is_young()) ? "YES" : "NO"); + } + + if (_g1->mark_in_progress()) + _g1->concurrent_mark()->registerCSetRegion(hr); + + assert(!hr->in_collection_set(), + "should not already be in the CSet"); + hr->set_in_collection_set(true); + hr->set_next_in_collection_set(_collection_set); + _collection_set = hr; + _collection_set_size++; + _collection_set_bytes_used_before += hr->used(); +} + +void +G1CollectorPolicy_BestRegionsFirst:: +choose_collection_set(HeapRegion* pop_region) { + double non_young_start_time_sec; + start_recording_regions(); + + if (pop_region != NULL) { + _target_pause_time_ms = (double) G1MaxPauseTimeMS; + } else { + guarantee(_target_pause_time_ms > -1.0, + "_target_pause_time_ms should have been set!"); + } + + // pop region is either null (and so is CS), or else it *is* the CS. + assert(_collection_set == pop_region, "Precondition"); + + double base_time_ms = predict_base_elapsed_time_ms(_pending_cards); + double predicted_pause_time_ms = base_time_ms; + + double target_time_ms = _target_pause_time_ms; + double time_remaining_ms = target_time_ms - base_time_ms; + + // the 10% and 50% values are arbitrary... + if (time_remaining_ms < 0.10*target_time_ms) { + time_remaining_ms = 0.50 * target_time_ms; + _within_target = false; + } else { + _within_target = true; + } + + // We figure out the number of bytes available for future to-space. + // For new regions without marking information, we must assume the + // worst-case of complete survival. If we have marking information for a + // region, we can bound the amount of live data. We can add a number of + // such regions, as long as the sum of the live data bounds does not + // exceed the available evacuation space. + size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes; + + size_t expansion_bytes = + _g1->expansion_regions() * HeapRegion::GrainBytes; + + if (pop_region == NULL) { + _collection_set_bytes_used_before = 0; + _collection_set_size = 0; + } + + // Adjust for expansion and slop. + max_live_bytes = max_live_bytes + expansion_bytes; + + assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!"); + + HeapRegion* hr; + if (in_young_gc_mode()) { + double young_start_time_sec = os::elapsedTime(); + + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr("Adding %d young regions to the CSet", + _g1->young_list_length()); + } + _young_cset_length = 0; + _last_young_gc_full = full_young_gcs() ? true : false; + if (_last_young_gc_full) + ++_full_young_pause_num; + else + ++_partial_young_pause_num; + hr = _g1->pop_region_from_young_list(); + while (hr != NULL) { + + assert( hr->young_index_in_cset() == -1, "invariant" ); + assert( hr->age_in_surv_rate_group() != -1, "invariant" ); + hr->set_young_index_in_cset((int) _young_cset_length); + + ++_young_cset_length; + double predicted_time_ms = predict_region_elapsed_time_ms(hr, true); + time_remaining_ms -= predicted_time_ms; + predicted_pause_time_ms += predicted_time_ms; + if (hr == pop_region) { + // The popular region was young. Skip over it. + assert(hr->in_collection_set(), "It's the pop region."); + } else { + assert(!hr->in_collection_set(), "It's not the pop region."); + add_to_collection_set(hr); + record_cset_region(hr, true); + } + max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr(" Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.", + hr->bottom(), hr->end()); + gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", + max_live_bytes/K); + } + hr = _g1->pop_region_from_young_list(); + } + + record_scan_only_regions(_g1->young_list_scan_only_length()); + + double young_end_time_sec = os::elapsedTime(); + _recorded_young_cset_choice_time_ms = + (young_end_time_sec - young_start_time_sec) * 1000.0; + + non_young_start_time_sec = os::elapsedTime(); + + if (_young_cset_length > 0 && _last_young_gc_full) { + // don't bother adding more regions... + goto choose_collection_set_end; + } + } else if (pop_region != NULL) { + // We're not in young mode, and we chose a popular region; don't choose + // any more. + return; + } + + if (!in_young_gc_mode() || !full_young_gcs()) { + bool should_continue = true; + NumberSeq seq; + double avg_prediction = 100000000000000000.0; // something very large + do { + hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, + avg_prediction); + if (hr != NULL && !hr->popular()) { + double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); + time_remaining_ms -= predicted_time_ms; + predicted_pause_time_ms += predicted_time_ms; + add_to_collection_set(hr); + record_cset_region(hr, false); + max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", + max_live_bytes/K); + } + seq.add(predicted_time_ms); + avg_prediction = seq.avg() + seq.sd(); + } + should_continue = + ( hr != NULL) && + ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0 + : _collection_set_size < _young_list_fixed_length ); + } while (should_continue); + + if (!adaptive_young_list_length() && + _collection_set_size < _young_list_fixed_length) + _should_revert_to_full_young_gcs = true; + } + +choose_collection_set_end: + count_CS_bytes_used(); + + end_recording_regions(); + + double non_young_end_time_sec = os::elapsedTime(); + _recorded_non_young_cset_choice_time_ms = + (non_young_end_time_sec - non_young_start_time_sec) * 1000.0; +} + +void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() { + G1CollectorPolicy::record_full_collection_end(); + _collectionSetChooser->updateAfterFullCollection(); +} + +void G1CollectorPolicy_BestRegionsFirst:: +expand_if_possible(size_t numRegions) { + size_t expansion_bytes = numRegions * HeapRegion::GrainBytes; + _g1->expand(expansion_bytes); +} + +void G1CollectorPolicy_BestRegionsFirst:: +record_collection_pause_end(bool popular, bool abandoned) { + G1CollectorPolicy::record_collection_pause_end(popular, abandoned); + assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,1199 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A G1CollectorPolicy makes policy decisions that determine the +// characteristics of the collector. Examples include: +// * choice of collection set. +// * when to collect. + +class HeapRegion; +class CollectionSetChooser; + +// Yes, this is a bit unpleasant... but it saves replicating the same thing +// over and over again and introducing subtle problems through small typos and +// cutting and pasting mistakes. The macros below introduces a number +// sequnce into the following two classes and the methods that access it. + +#define define_num_seq(name) \ +private: \ + NumberSeq _all_##name##_times_ms; \ +public: \ + void record_##name##_time_ms(double ms) { \ + _all_##name##_times_ms.add(ms); \ + } \ + NumberSeq* get_##name##_seq() { \ + return &_all_##name##_times_ms; \ + } + +class MainBodySummary; +class PopPreambleSummary; + +class PauseSummary { + define_num_seq(total) + define_num_seq(other) + +public: + virtual MainBodySummary* main_body_summary() { return NULL; } + virtual PopPreambleSummary* pop_preamble_summary() { return NULL; } +}; + +class MainBodySummary { + define_num_seq(satb_drain) // optional + define_num_seq(parallel) // parallel only + define_num_seq(ext_root_scan) + define_num_seq(mark_stack_scan) + define_num_seq(scan_only) + define_num_seq(update_rs) + define_num_seq(scan_rs) + define_num_seq(scan_new_refs) // Only for temp use; added to + // in parallel case. + define_num_seq(obj_copy) + define_num_seq(termination) // parallel only + define_num_seq(parallel_other) // parallel only + define_num_seq(mark_closure) + define_num_seq(clear_ct) // parallel only +}; + +class PopPreambleSummary { + define_num_seq(pop_preamble) + define_num_seq(pop_update_rs) + define_num_seq(pop_scan_rs) + define_num_seq(pop_closure_app) + define_num_seq(pop_evacuation) + define_num_seq(pop_other) +}; + +class NonPopSummary: public PauseSummary, + public MainBodySummary { +public: + virtual MainBodySummary* main_body_summary() { return this; } +}; + +class PopSummary: public PauseSummary, + public MainBodySummary, + public PopPreambleSummary { +public: + virtual MainBodySummary* main_body_summary() { return this; } + virtual PopPreambleSummary* pop_preamble_summary() { return this; } +}; + +class NonPopAbandonedSummary: public PauseSummary { +}; + +class PopAbandonedSummary: public PauseSummary, + public PopPreambleSummary { +public: + virtual PopPreambleSummary* pop_preamble_summary() { return this; } +}; + +class G1CollectorPolicy: public CollectorPolicy { +protected: + // The number of pauses during the execution. + long _n_pauses; + + // either equal to the number of parallel threads, if ParallelGCThreads + // has been set, or 1 otherwise + int _parallel_gc_threads; + + enum SomePrivateConstants { + NumPrevPausesForHeuristics = 10, + NumPrevGCsForHeuristics = 10, + NumAPIs = HeapRegion::MaxAge + }; + + G1MMUTracker* _mmu_tracker; + + void initialize_flags(); + + void initialize_all() { + initialize_flags(); + initialize_size_info(); + initialize_perm_generation(PermGen::MarkSweepCompact); + } + + virtual size_t default_init_heap_size() { + // Pick some reasonable default. + return 8*M; + } + + + double _cur_collection_start_sec; + size_t _cur_collection_pause_used_at_start_bytes; + size_t _cur_collection_pause_used_regions_at_start; + size_t _prev_collection_pause_used_at_end_bytes; + double _cur_collection_par_time_ms; + double _cur_satb_drain_time_ms; + double _cur_clear_ct_time_ms; + bool _satb_drain_time_set; + double _cur_popular_preamble_start_ms; + double _cur_popular_preamble_time_ms; + double _cur_popular_compute_rc_time_ms; + double _cur_popular_evac_time_ms; + + double _cur_CH_strong_roots_end_sec; + double _cur_CH_strong_roots_dur_ms; + double _cur_G1_strong_roots_end_sec; + double _cur_G1_strong_roots_dur_ms; + + // Statistics for recent GC pauses. See below for how indexed. + TruncatedSeq* _recent_CH_strong_roots_times_ms; + TruncatedSeq* _recent_G1_strong_roots_times_ms; + TruncatedSeq* _recent_evac_times_ms; + // These exclude marking times. + TruncatedSeq* _recent_pause_times_ms; + TruncatedSeq* _recent_gc_times_ms; + + TruncatedSeq* _recent_CS_bytes_used_before; + TruncatedSeq* _recent_CS_bytes_surviving; + + TruncatedSeq* _recent_rs_sizes; + + TruncatedSeq* _concurrent_mark_init_times_ms; + TruncatedSeq* _concurrent_mark_remark_times_ms; + TruncatedSeq* _concurrent_mark_cleanup_times_ms; + + NonPopSummary* _non_pop_summary; + PopSummary* _pop_summary; + NonPopAbandonedSummary* _non_pop_abandoned_summary; + PopAbandonedSummary* _pop_abandoned_summary; + + NumberSeq* _all_pause_times_ms; + NumberSeq* _all_full_gc_times_ms; + double _stop_world_start; + NumberSeq* _all_stop_world_times_ms; + NumberSeq* _all_yield_times_ms; + + size_t _region_num_young; + size_t _region_num_tenured; + size_t _prev_region_num_young; + size_t _prev_region_num_tenured; + + NumberSeq* _all_mod_union_times_ms; + + int _aux_num; + NumberSeq* _all_aux_times_ms; + double* _cur_aux_start_times_ms; + double* _cur_aux_times_ms; + bool* _cur_aux_times_set; + + double* _par_last_ext_root_scan_times_ms; + double* _par_last_mark_stack_scan_times_ms; + double* _par_last_scan_only_times_ms; + double* _par_last_scan_only_regions_scanned; + double* _par_last_update_rs_start_times_ms; + double* _par_last_update_rs_times_ms; + double* _par_last_update_rs_processed_buffers; + double* _par_last_scan_rs_start_times_ms; + double* _par_last_scan_rs_times_ms; + double* _par_last_scan_new_refs_times_ms; + double* _par_last_obj_copy_times_ms; + double* _par_last_termination_times_ms; + + // there are two pases during popular pauses, so we need to store + // somewhere the results of the first pass + double* _pop_par_last_update_rs_start_times_ms; + double* _pop_par_last_update_rs_times_ms; + double* _pop_par_last_update_rs_processed_buffers; + double* _pop_par_last_scan_rs_start_times_ms; + double* _pop_par_last_scan_rs_times_ms; + double* _pop_par_last_closure_app_times_ms; + + double _pop_compute_rc_start; + double _pop_evac_start; + + // indicates that we are in young GC mode + bool _in_young_gc_mode; + + // indicates whether we are in full young or partially young GC mode + bool _full_young_gcs; + + // if true, then it tries to dynamically adjust the length of the + // young list + bool _adaptive_young_list_length; + size_t _young_list_min_length; + size_t _young_list_target_length; + size_t _young_list_so_prefix_length; + size_t _young_list_fixed_length; + + size_t _young_cset_length; + bool _last_young_gc_full; + + double _target_pause_time_ms; + + unsigned _full_young_pause_num; + unsigned _partial_young_pause_num; + + bool _during_marking; + bool _in_marking_window; + bool _in_marking_window_im; + + SurvRateGroup* _short_lived_surv_rate_group; + SurvRateGroup* _survivor_surv_rate_group; + // add here any more surv rate groups + + bool during_marking() { + return _during_marking; + } + + // + +private: + enum PredictionConstants { + TruncatedSeqLength = 10 + }; + + TruncatedSeq* _alloc_rate_ms_seq; + double _prev_collection_pause_end_ms; + + TruncatedSeq* _pending_card_diff_seq; + TruncatedSeq* _rs_length_diff_seq; + TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_per_scan_only_region_ms_seq; + TruncatedSeq* _fully_young_cards_per_entry_ratio_seq; + TruncatedSeq* _partially_young_cards_per_entry_ratio_seq; + TruncatedSeq* _cost_per_entry_ms_seq; + TruncatedSeq* _partially_young_cost_per_entry_ms_seq; + TruncatedSeq* _cost_per_byte_ms_seq; + TruncatedSeq* _constant_other_time_ms_seq; + TruncatedSeq* _young_other_cost_per_region_ms_seq; + TruncatedSeq* _non_young_other_cost_per_region_ms_seq; + + TruncatedSeq* _pending_cards_seq; + TruncatedSeq* _scanned_cards_seq; + TruncatedSeq* _rs_lengths_seq; + + TruncatedSeq* _cost_per_byte_ms_during_cm_seq; + TruncatedSeq* _cost_per_scan_only_region_ms_during_cm_seq; + + TruncatedSeq* _young_gc_eff_seq; + + TruncatedSeq* _max_conc_overhead_seq; + + size_t _recorded_young_regions; + size_t _recorded_scan_only_regions; + size_t _recorded_non_young_regions; + size_t _recorded_region_num; + + size_t _free_regions_at_end_of_collection; + size_t _scan_only_regions_at_end_of_collection; + + size_t _recorded_rs_lengths; + size_t _max_rs_lengths; + + size_t _recorded_marked_bytes; + size_t _recorded_young_bytes; + + size_t _predicted_pending_cards; + size_t _predicted_cards_scanned; + size_t _predicted_rs_lengths; + size_t _predicted_bytes_to_copy; + + double _predicted_survival_ratio; + double _predicted_rs_update_time_ms; + double _predicted_rs_scan_time_ms; + double _predicted_scan_only_scan_time_ms; + double _predicted_object_copy_time_ms; + double _predicted_constant_other_time_ms; + double _predicted_young_other_time_ms; + double _predicted_non_young_other_time_ms; + double _predicted_pause_time_ms; + + double _vtime_diff_ms; + + double _recorded_young_free_cset_time_ms; + double _recorded_non_young_free_cset_time_ms; + + double _sigma; + double _expensive_region_limit_ms; + + size_t _rs_lengths_prediction; + + size_t _known_garbage_bytes; + double _known_garbage_ratio; + + double sigma() { + return _sigma; + } + + // A function that prevents us putting too much stock in small sample + // sets. Returns a number between 2.0 and 1.0, depending on the number + // of samples. 5 or more samples yields one; fewer scales linearly from + // 2.0 at 1 sample to 1.0 at 5. + double confidence_factor(int samples) { + if (samples > 4) return 1.0; + else return 1.0 + sigma() * ((double)(5 - samples))/2.0; + } + + double get_new_neg_prediction(TruncatedSeq* seq) { + return seq->davg() - sigma() * seq->dsd(); + } + +#ifndef PRODUCT + bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); +#endif // PRODUCT + +protected: + double _pause_time_target_ms; + double _recorded_young_cset_choice_time_ms; + double _recorded_non_young_cset_choice_time_ms; + bool _within_target; + size_t _pending_cards; + size_t _max_pending_cards; + +public: + + void set_region_short_lived(HeapRegion* hr) { + hr->install_surv_rate_group(_short_lived_surv_rate_group); + } + + void set_region_survivors(HeapRegion* hr) { + hr->install_surv_rate_group(_survivor_surv_rate_group); + } + +#ifndef PRODUCT + bool verify_young_ages(); +#endif // PRODUCT + + void tag_scan_only(size_t short_lived_scan_only_length); + + double get_new_prediction(TruncatedSeq* seq) { + return MAX2(seq->davg() + sigma() * seq->dsd(), + seq->davg() * confidence_factor(seq->num())); + } + + size_t young_cset_length() { + return _young_cset_length; + } + + void record_max_rs_lengths(size_t rs_lengths) { + _max_rs_lengths = rs_lengths; + } + + size_t predict_pending_card_diff() { + double prediction = get_new_neg_prediction(_pending_card_diff_seq); + if (prediction < 0.00001) + return 0; + else + return (size_t) prediction; + } + + size_t predict_pending_cards() { + size_t max_pending_card_num = _g1->max_pending_card_num(); + size_t diff = predict_pending_card_diff(); + size_t prediction; + if (diff > max_pending_card_num) + prediction = max_pending_card_num; + else + prediction = max_pending_card_num - diff; + + return prediction; + } + + size_t predict_rs_length_diff() { + return (size_t) get_new_prediction(_rs_length_diff_seq); + } + + double predict_alloc_rate_ms() { + return get_new_prediction(_alloc_rate_ms_seq); + } + + double predict_cost_per_card_ms() { + return get_new_prediction(_cost_per_card_ms_seq); + } + + double predict_rs_update_time_ms(size_t pending_cards) { + return (double) pending_cards * predict_cost_per_card_ms(); + } + + double predict_fully_young_cards_per_entry_ratio() { + return get_new_prediction(_fully_young_cards_per_entry_ratio_seq); + } + + double predict_partially_young_cards_per_entry_ratio() { + if (_partially_young_cards_per_entry_ratio_seq->num() < 2) + return predict_fully_young_cards_per_entry_ratio(); + else + return get_new_prediction(_partially_young_cards_per_entry_ratio_seq); + } + + size_t predict_young_card_num(size_t rs_length) { + return (size_t) ((double) rs_length * + predict_fully_young_cards_per_entry_ratio()); + } + + size_t predict_non_young_card_num(size_t rs_length) { + return (size_t) ((double) rs_length * + predict_partially_young_cards_per_entry_ratio()); + } + + double predict_rs_scan_time_ms(size_t card_num) { + if (full_young_gcs()) + return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq); + else + return predict_partially_young_rs_scan_time_ms(card_num); + } + + double predict_partially_young_rs_scan_time_ms(size_t card_num) { + if (_partially_young_cost_per_entry_ms_seq->num() < 3) + return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq); + else + return (double) card_num * + get_new_prediction(_partially_young_cost_per_entry_ms_seq); + } + + double predict_scan_only_time_ms_during_cm(size_t scan_only_region_num) { + if (_cost_per_scan_only_region_ms_during_cm_seq->num() < 3) + return 1.5 * (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_seq); + else + return (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_during_cm_seq); + } + + double predict_scan_only_time_ms(size_t scan_only_region_num) { + if (_in_marking_window_im) + return predict_scan_only_time_ms_during_cm(scan_only_region_num); + else + return (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_seq); + } + + double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) { + if (_cost_per_byte_ms_during_cm_seq->num() < 3) + return 1.1 * (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_seq); + else + return (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_during_cm_seq); + } + + double predict_object_copy_time_ms(size_t bytes_to_copy) { + if (_in_marking_window && !_in_marking_window_im) + return predict_object_copy_time_ms_during_cm(bytes_to_copy); + else + return (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_seq); + } + + double predict_constant_other_time_ms() { + return get_new_prediction(_constant_other_time_ms_seq); + } + + double predict_young_other_time_ms(size_t young_num) { + return + (double) young_num * + get_new_prediction(_young_other_cost_per_region_ms_seq); + } + + double predict_non_young_other_time_ms(size_t non_young_num) { + return + (double) non_young_num * + get_new_prediction(_non_young_other_cost_per_region_ms_seq); + } + + void check_if_region_is_too_expensive(double predicted_time_ms); + + double predict_young_collection_elapsed_time_ms(size_t adjustment); + double predict_base_elapsed_time_ms(size_t pending_cards); + double predict_base_elapsed_time_ms(size_t pending_cards, + size_t scanned_cards); + size_t predict_bytes_to_copy(HeapRegion* hr); + double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); + + // for use by: calculate_optimal_so_length(length) + void predict_gc_eff(size_t young_region_num, + size_t so_length, + double base_time_ms, + double *gc_eff, + double *pause_time_ms); + + // for use by: calculate_young_list_target_config(rs_length) + bool predict_gc_eff(size_t young_region_num, + size_t so_length, + double base_time_with_so_ms, + size_t init_free_regions, + double target_pause_time_ms, + double* gc_eff); + + void start_recording_regions(); + void record_cset_region(HeapRegion* hr, bool young); + void record_scan_only_regions(size_t scan_only_length); + void end_recording_regions(); + + void record_vtime_diff_ms(double vtime_diff_ms) { + _vtime_diff_ms = vtime_diff_ms; + } + + void record_young_free_cset_time_ms(double time_ms) { + _recorded_young_free_cset_time_ms = time_ms; + } + + void record_non_young_free_cset_time_ms(double time_ms) { + _recorded_non_young_free_cset_time_ms = time_ms; + } + + double predict_young_gc_eff() { + return get_new_neg_prediction(_young_gc_eff_seq); + } + + // + +public: + void cset_regions_freed() { + bool propagate = _last_young_gc_full && !_in_marking_window; + _short_lived_surv_rate_group->all_surviving_words_recorded(propagate); + _survivor_surv_rate_group->all_surviving_words_recorded(propagate); + // also call it on any more surv rate groups + } + + void set_known_garbage_bytes(size_t known_garbage_bytes) { + _known_garbage_bytes = known_garbage_bytes; + size_t heap_bytes = _g1->capacity(); + _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes; + } + + void decrease_known_garbage_bytes(size_t known_garbage_bytes) { + guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" ); + + _known_garbage_bytes -= known_garbage_bytes; + size_t heap_bytes = _g1->capacity(); + _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes; + } + + G1MMUTracker* mmu_tracker() { + return _mmu_tracker; + } + + double predict_init_time_ms() { + return get_new_prediction(_concurrent_mark_init_times_ms); + } + + double predict_remark_time_ms() { + return get_new_prediction(_concurrent_mark_remark_times_ms); + } + + double predict_cleanup_time_ms() { + return get_new_prediction(_concurrent_mark_cleanup_times_ms); + } + + // Returns an estimate of the survival rate of the region at yg-age + // "yg_age". + double predict_yg_surv_rate(int age) { + TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age); + if (seq->num() == 0) + gclog_or_tty->print("BARF! age is %d", age); + guarantee( seq->num() > 0, "invariant" ); + double pred = get_new_prediction(seq); + if (pred > 1.0) + pred = 1.0; + return pred; + } + + double accum_yg_surv_rate_pred(int age) { + return _short_lived_surv_rate_group->accum_surv_rate_pred(age); + } + +protected: + void print_stats (int level, const char* str, double value); + void print_stats (int level, const char* str, int value); + void print_par_stats (int level, const char* str, double* data) { + print_par_stats(level, str, data, true); + } + void print_par_stats (int level, const char* str, double* data, bool summary); + void print_par_buffers (int level, const char* str, double* data, bool summary); + + void check_other_times(int level, + NumberSeq* other_times_ms, + NumberSeq* calc_other_times_ms) const; + + void print_summary (PauseSummary* stats) const; + void print_abandoned_summary(PauseSummary* non_pop_summary, + PauseSummary* pop_summary) const; + + void print_summary (int level, const char* str, NumberSeq* seq) const; + void print_summary_sd (int level, const char* str, NumberSeq* seq) const; + + double avg_value (double* data); + double max_value (double* data); + double sum_of_values (double* data); + double max_sum (double* data1, double* data2); + + int _last_satb_drain_processed_buffers; + int _last_update_rs_processed_buffers; + double _last_pause_time_ms; + + size_t _bytes_in_to_space_before_gc; + size_t _bytes_in_to_space_after_gc; + size_t bytes_in_to_space_during_gc() { + return + _bytes_in_to_space_after_gc - _bytes_in_to_space_before_gc; + } + size_t _bytes_in_collection_set_before_gc; + // Used to count used bytes in CS. + friend class CountCSClosure; + + // Statistics kept per GC stoppage, pause or full. + TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec; + + // We track markings. + int _num_markings; + double _mark_thread_startup_sec; // Time at startup of marking thread + + // Add a new GC of the given duration and end time to the record. + void update_recent_gc_times(double end_time_sec, double elapsed_ms); + + // The head of the list (via "next_in_collection_set()") representing the + // current collection set. + HeapRegion* _collection_set; + size_t _collection_set_size; + size_t _collection_set_bytes_used_before; + + // Info about marking. + int _n_marks; // Sticky at 2, so we know when we've done at least 2. + + // The number of collection pauses at the end of the last mark. + size_t _n_pauses_at_mark_end; + + // ==== This section is for stats related to starting Conc Refinement on time. + size_t _conc_refine_enabled; + size_t _conc_refine_zero_traversals; + size_t _conc_refine_max_traversals; + // In # of heap regions. + size_t _conc_refine_current_delta; + + // At the beginning of a collection pause, update the variables above, + // especially the "delta". + void update_conc_refine_data(); + // ==== + + // Stash a pointer to the g1 heap. + G1CollectedHeap* _g1; + + // The average time in ms per collection pause, averaged over recent pauses. + double recent_avg_time_for_pauses_ms(); + + // The average time in ms for processing CollectedHeap strong roots, per + // collection pause, averaged over recent pauses. + double recent_avg_time_for_CH_strong_ms(); + + // The average time in ms for processing the G1 remembered set, per + // pause, averaged over recent pauses. + double recent_avg_time_for_G1_strong_ms(); + + // The average time in ms for "evacuating followers", per pause, averaged + // over recent pauses. + double recent_avg_time_for_evac_ms(); + + // The number of "recent" GCs recorded in the number sequences + int number_of_recent_gcs(); + + // The average survival ratio, computed by the total number of bytes + // suriviving / total number of bytes before collection over the last + // several recent pauses. + double recent_avg_survival_fraction(); + // The survival fraction of the most recent pause; if there have been no + // pauses, returns 1.0. + double last_survival_fraction(); + + // Returns a "conservative" estimate of the recent survival rate, i.e., + // one that may be higher than "recent_avg_survival_fraction". + // This is conservative in several ways: + // If there have been few pauses, it will assume a potential high + // variance, and err on the side of caution. + // It puts a lower bound (currently 0.1) on the value it will return. + // To try to detect phase changes, if the most recent pause ("latest") has a + // higher-than average ("avg") survival rate, it returns that rate. + // "work" version is a utility function; young is restricted to young regions. + double conservative_avg_survival_fraction_work(double avg, + double latest); + + // The arguments are the two sequences that keep track of the number of bytes + // surviving and the total number of bytes before collection, resp., + // over the last evereal recent pauses + // Returns the survival rate for the category in the most recent pause. + // If there have been no pauses, returns 1.0. + double last_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before); + + // The arguments are the two sequences that keep track of the number of bytes + // surviving and the total number of bytes before collection, resp., + // over the last several recent pauses + // Returns the average survival ration over the last several recent pauses + // If there have been no pauses, return 1.0 + double recent_avg_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before); + + double conservative_avg_survival_fraction() { + double avg = recent_avg_survival_fraction(); + double latest = last_survival_fraction(); + return conservative_avg_survival_fraction_work(avg, latest); + } + + // The ratio of gc time to elapsed time, computed over recent pauses. + double _recent_avg_pause_time_ratio; + + double recent_avg_pause_time_ratio() { + return _recent_avg_pause_time_ratio; + } + + // Number of pauses between concurrent marking. + size_t _pauses_btwn_concurrent_mark; + + size_t _n_marks_since_last_pause; + + // True iff CM has been initiated. + bool _conc_mark_initiated; + + // True iff CM should be initiated + bool _should_initiate_conc_mark; + bool _should_revert_to_full_young_gcs; + bool _last_full_young_gc; + + // This set of variables tracks the collector efficiency, in order to + // determine whether we should initiate a new marking. + double _cur_mark_stop_world_time_ms; + double _mark_init_start_sec; + double _mark_remark_start_sec; + double _mark_cleanup_start_sec; + double _mark_closure_time_ms; + + void calculate_young_list_min_length(); + void calculate_young_list_target_config(); + void calculate_young_list_target_config(size_t rs_lengths); + size_t calculate_optimal_so_length(size_t young_list_length); + +public: + + G1CollectorPolicy(); + + virtual G1CollectorPolicy* as_g1_policy() { return this; } + + virtual CollectorPolicy::Name kind() { + return CollectorPolicy::G1CollectorPolicyKind; + } + + void check_prediction_validity(); + + size_t bytes_in_collection_set() { + return _bytes_in_collection_set_before_gc; + } + + size_t bytes_in_to_space() { + return bytes_in_to_space_during_gc(); + } + + unsigned calc_gc_alloc_time_stamp() { + return _all_pause_times_ms->num() + 1; + } + +protected: + + // Count the number of bytes used in the CS. + void count_CS_bytes_used(); + + // Together these do the base cleanup-recording work. Subclasses might + // want to put something between them. + void record_concurrent_mark_cleanup_end_work1(size_t freed_bytes, + size_t max_live_bytes); + void record_concurrent_mark_cleanup_end_work2(); + +public: + + virtual void init(); + + virtual HeapWord* mem_allocate_work(size_t size, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded); + + // This method controls how a collector handles one or more + // of its generations being fully allocated. + virtual HeapWord* satisfy_failed_allocation(size_t size, + bool is_tlab); + + BarrierSet::Name barrier_set_name() { return BarrierSet::G1SATBCTLogging; } + + GenRemSet::Name rem_set_name() { return GenRemSet::CardTable; } + + // The number of collection pauses so far. + long n_pauses() const { return _n_pauses; } + + // Update the heuristic info to record a collection pause of the given + // start time, where the given number of bytes were used at the start. + // This may involve changing the desired size of a collection set. + + virtual void record_stop_world_start(); + + virtual void record_collection_pause_start(double start_time_sec, + size_t start_used); + + virtual void record_popular_pause_preamble_start(); + virtual void record_popular_pause_preamble_end(); + + // Must currently be called while the world is stopped. + virtual void record_concurrent_mark_init_start(); + virtual void record_concurrent_mark_init_end(); + void record_concurrent_mark_init_end_pre(double + mark_init_elapsed_time_ms); + + void record_mark_closure_time(double mark_closure_time_ms); + + virtual void record_concurrent_mark_remark_start(); + virtual void record_concurrent_mark_remark_end(); + + virtual void record_concurrent_mark_cleanup_start(); + virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes); + virtual void record_concurrent_mark_cleanup_completed(); + + virtual void record_concurrent_pause(); + virtual void record_concurrent_pause_end(); + + virtual void record_collection_pause_end_CH_strong_roots(); + virtual void record_collection_pause_end_G1_strong_roots(); + + virtual void record_collection_pause_end(bool popular, bool abandoned); + + // Record the fact that a full collection occurred. + virtual void record_full_collection_start(); + virtual void record_full_collection_end(); + + void record_ext_root_scan_time(int worker_i, double ms) { + _par_last_ext_root_scan_times_ms[worker_i] = ms; + } + + void record_mark_stack_scan_time(int worker_i, double ms) { + _par_last_mark_stack_scan_times_ms[worker_i] = ms; + } + + void record_scan_only_time(int worker_i, double ms, int n) { + _par_last_scan_only_times_ms[worker_i] = ms; + _par_last_scan_only_regions_scanned[worker_i] = (double) n; + } + + void record_satb_drain_time(double ms) { + _cur_satb_drain_time_ms = ms; + _satb_drain_time_set = true; + } + + void record_satb_drain_processed_buffers (int processed_buffers) { + _last_satb_drain_processed_buffers = processed_buffers; + } + + void record_mod_union_time(double ms) { + _all_mod_union_times_ms->add(ms); + } + + void record_update_rs_start_time(int thread, double ms) { + _par_last_update_rs_start_times_ms[thread] = ms; + } + + void record_update_rs_time(int thread, double ms) { + _par_last_update_rs_times_ms[thread] = ms; + } + + void record_update_rs_processed_buffers (int thread, + double processed_buffers) { + _par_last_update_rs_processed_buffers[thread] = processed_buffers; + } + + void record_scan_rs_start_time(int thread, double ms) { + _par_last_scan_rs_start_times_ms[thread] = ms; + } + + void record_scan_rs_time(int thread, double ms) { + _par_last_scan_rs_times_ms[thread] = ms; + } + + void record_scan_new_refs_time(int thread, double ms) { + _par_last_scan_new_refs_times_ms[thread] = ms; + } + + double get_scan_new_refs_time(int thread) { + return _par_last_scan_new_refs_times_ms[thread]; + } + + void reset_obj_copy_time(int thread) { + _par_last_obj_copy_times_ms[thread] = 0.0; + } + + void reset_obj_copy_time() { + reset_obj_copy_time(0); + } + + void record_obj_copy_time(int thread, double ms) { + _par_last_obj_copy_times_ms[thread] += ms; + } + + void record_obj_copy_time(double ms) { + record_obj_copy_time(0, ms); + } + + void record_termination_time(int thread, double ms) { + _par_last_termination_times_ms[thread] = ms; + } + + void record_termination_time(double ms) { + record_termination_time(0, ms); + } + + void record_pause_time(double ms) { + _last_pause_time_ms = ms; + } + + void record_clear_ct_time(double ms) { + _cur_clear_ct_time_ms = ms; + } + + void record_par_time(double ms) { + _cur_collection_par_time_ms = ms; + } + + void record_aux_start_time(int i) { + guarantee(i < _aux_num, "should be within range"); + _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0; + } + + void record_aux_end_time(int i) { + guarantee(i < _aux_num, "should be within range"); + double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i]; + _cur_aux_times_set[i] = true; + _cur_aux_times_ms[i] += ms; + } + + void record_pop_compute_rc_start(); + void record_pop_compute_rc_end(); + + void record_pop_evac_start(); + void record_pop_evac_end(); + + // Record the fact that "bytes" bytes allocated in a region. + void record_before_bytes(size_t bytes); + void record_after_bytes(size_t bytes); + + // Returns "true" if this is a good time to do a collection pause. + // The "word_size" argument, if non-zero, indicates the size of an + // allocation request that is prompting this query. + virtual bool should_do_collection_pause(size_t word_size) = 0; + + // Choose a new collection set. Marks the chosen regions as being + // "in_collection_set", and links them together. The head and number of + // the collection set are available via access methods. + // If "pop_region" is non-NULL, it is a popular region that has already + // been added to the collection set. + virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0; + + void clear_collection_set() { _collection_set = NULL; } + + // The head of the list (via "next_in_collection_set()") representing the + // current collection set. + HeapRegion* collection_set() { return _collection_set; } + + // Sets the collection set to the given single region. + virtual void set_single_region_collection_set(HeapRegion* hr); + + // The number of elements in the current collection set. + size_t collection_set_size() { return _collection_set_size; } + + // Add "hr" to the CS. + void add_to_collection_set(HeapRegion* hr); + + bool should_initiate_conc_mark() { return _should_initiate_conc_mark; } + void set_should_initiate_conc_mark() { _should_initiate_conc_mark = true; } + void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; } + + void checkpoint_conc_overhead(); + + // If an expansion would be appropriate, because recent GC overhead had + // exceeded the desired limit, return an amount to expand by. + virtual size_t expansion_amount(); + + // note start of mark thread + void note_start_of_mark_thread(); + + // The marked bytes of the "r" has changed; reclassify it's desirability + // for marking. Also asserts that "r" is eligible for a CS. + virtual void note_change_in_marked_bytes(HeapRegion* r) = 0; + +#ifndef PRODUCT + // Check any appropriate marked bytes info, asserting false if + // something's wrong, else returning "true". + virtual bool assertMarkedBytesDataOK() = 0; +#endif + + // Print tracing information. + void print_tracing_info() const; + + // Print stats on young survival ratio + void print_yg_surv_rate_info() const; + + void finished_recalculating_age_indexes() { + _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + // do that for any other surv rate groups + } + + bool should_add_next_region_to_young_list(); + + bool in_young_gc_mode() { + return _in_young_gc_mode; + } + void set_in_young_gc_mode(bool in_young_gc_mode) { + _in_young_gc_mode = in_young_gc_mode; + } + + bool full_young_gcs() { + return _full_young_gcs; + } + void set_full_young_gcs(bool full_young_gcs) { + _full_young_gcs = full_young_gcs; + } + + bool adaptive_young_list_length() { + return _adaptive_young_list_length; + } + void set_adaptive_young_list_length(bool adaptive_young_list_length) { + _adaptive_young_list_length = adaptive_young_list_length; + } + + inline double get_gc_eff_factor() { + double ratio = _known_garbage_ratio; + + double square = ratio * ratio; + // square = square * square; + double ret = square * 9.0 + 1.0; +#if 0 + gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret); +#endif // 0 + guarantee(0.0 <= ret && ret < 10.0, "invariant!"); + return ret; + } + + // + // Survivor regions policy. + // +protected: + + // Current tenuring threshold, set to 0 if the collector reaches the + // maximum amount of suvivors regions. + int _tenuring_threshold; + +public: + + inline GCAllocPurpose + evacuation_destination(HeapRegion* src_region, int age, size_t word_sz) { + if (age < _tenuring_threshold && src_region->is_young()) { + return GCAllocForSurvived; + } else { + return GCAllocForTenured; + } + } + + inline bool track_object_age(GCAllocPurpose purpose) { + return purpose == GCAllocForSurvived; + } + + inline GCAllocPurpose alternative_purpose(int purpose) { + return GCAllocForTenured; + } + + uint max_regions(int purpose); + + // The limit on regions for a particular purpose is reached. + void note_alloc_region_limit_reached(int purpose) { + if (purpose == GCAllocForSurvived) { + _tenuring_threshold = 0; + } + } + + void note_start_adding_survivor_regions() { + _survivor_surv_rate_group->start_adding_regions(); + } + + void note_stop_adding_survivor_regions() { + _survivor_surv_rate_group->stop_adding_regions(); + } +}; + +// This encapsulates a particular strategy for a g1 Collector. +// +// Start a concurrent mark when our heap size is n bytes +// greater then our heap size was at the last concurrent +// mark. Where n is a function of the CMSTriggerRatio +// and the MinHeapFreeRatio. +// +// Start a g1 collection pause when we have allocated the +// average number of bytes currently being freed in +// a collection, but only if it is at least one region +// full +// +// Resize Heap based on desired +// allocation space, where desired allocation space is +// a function of survival rate and desired future to size. +// +// Choose collection set by first picking all older regions +// which have a survival rate which beats our projected young +// survival rate. Then fill out the number of needed regions +// with young regions. + +class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy { + CollectionSetChooser* _collectionSetChooser; + // If the estimated is less then desirable, resize if possible. + void expand_if_possible(size_t numRegions); + + virtual void choose_collection_set(HeapRegion* pop_region = NULL); + virtual void record_collection_pause_start(double start_time_sec, + size_t start_used); + virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes); + virtual void record_full_collection_end(); + +public: + G1CollectorPolicy_BestRegionsFirst() { + _collectionSetChooser = new CollectionSetChooser(); + } + void record_collection_pause_end(bool popular, bool abandoned); + bool should_do_collection_pause(size_t word_size); + virtual void set_single_region_collection_set(HeapRegion* hr); + // This is not needed any more, after the CSet choosing code was + // changed to use the pause prediction work. But let's leave the + // hook in just in case. + void note_change_in_marked_bytes(HeapRegion* r) { } +#ifndef PRODUCT + bool assertMarkedBytesDataOK(); +#endif +}; + +// This should move to some place more general... + +// If we have "n" measurements, and we've kept track of their "sum" and the +// "sum_of_squares" of the measurements, this returns the variance of the +// sequence. +inline double variance(int n, double sum_of_squares, double sum) { + double n_d = (double)n; + double avg = sum/n_d; + return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d; +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MMUTracker.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,187 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1MMUTracker.cpp.incl" + +#define _DISABLE_MMU 0 + +// can't rely on comparing doubles with tolerating a small margin for error +#define SMALL_MARGIN 0.0000001 +#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN ) +#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2)) +#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1)) + +/***** ALL TIMES ARE IN SECS!!!!!!! *****/ + +G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) : + _time_slice(time_slice), + _max_gc_time(max_gc_time), + _conc_overhead_time_sec(0.0) { } + +void +G1MMUTracker::update_conc_overhead(double conc_overhead) { + double conc_overhead_time_sec = _time_slice * conc_overhead; + if (conc_overhead_time_sec > 0.9 * _max_gc_time) { + // We are screwed, as we only seem to have <10% of the soft + // real-time goal available for pauses. Let's admit defeat and + // allow something more generous as a pause target. + conc_overhead_time_sec = 0.75 * _max_gc_time; + } + + _conc_overhead_time_sec = conc_overhead_time_sec; +} + +G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) : + G1MMUTracker(time_slice, max_gc_time), + _head_index(0), + _tail_index(trim_index(_head_index+1)), + _no_entries(0) { } + +void G1MMUTrackerQueue::remove_expired_entries(double current_time) { + double limit = current_time - _time_slice; + while (_no_entries > 0) { + if (is_double_geq(limit, _array[_tail_index].end_time())) { + _tail_index = trim_index(_tail_index + 1); + --_no_entries; + } else + return; + } + guarantee(_no_entries == 0, "should have no entries in the array"); +} + +double G1MMUTrackerQueue::calculate_gc_time(double current_time) { + double gc_time = 0.0; + double limit = current_time - _time_slice; + for (int i = 0; i < _no_entries; ++i) { + int index = trim_index(_tail_index + i); + G1MMUTrackerQueueElem *elem = &_array[index]; + if (elem->end_time() > limit) { + if (elem->start_time() > limit) + gc_time += elem->duration(); + else + gc_time += elem->end_time() - limit; + } + } + return gc_time; +} + +void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) { + double longest_allowed = longest_pause_internal(start); + if (longest_allowed < 0.0) + longest_allowed = 0.0; + double duration = end - start; + + remove_expired_entries(end); + if (_no_entries == QueueLength) { + // OK, right now when we fill up we bomb out + // there are a few ways of dealing with this "gracefully" + // increase the array size (:-) + // remove the oldest entry (this might allow more GC time for + // the time slice than what's allowed) + // concolidate the two entries with the minimum gap between them + // (this mighte allow less GC time than what's allowed) + guarantee(0, "array full, currently we can't recover"); + } + _head_index = trim_index(_head_index + 1); + ++_no_entries; + _array[_head_index] = G1MMUTrackerQueueElem(start, end); +} + +// basically the _internal call does not remove expired entries +// this is for trying things out in the future and a couple +// of other places (debugging) + +double G1MMUTrackerQueue::longest_pause(double current_time) { + if (_DISABLE_MMU) + return _max_gc_time; + + MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag); + remove_expired_entries(current_time); + + return longest_pause_internal(current_time); +} + +double G1MMUTrackerQueue::longest_pause_internal(double current_time) { + double target_time = _max_gc_time; + + while( 1 ) { + double gc_time = + calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec; + double diff = target_time + gc_time - _max_gc_time; + if (!is_double_leq_0(diff)) { + target_time -= diff; + if (is_double_leq_0(target_time)) { + target_time = -1.0; + break; + } + } else { + break; + } + } + + return target_time; +} + +// basically the _internal call does not remove expired entries +// this is for trying things out in the future and a couple +// of other places (debugging) + +double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) { + if (_DISABLE_MMU) + return 0.0; + + MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag); + remove_expired_entries(current_time); + + return when_internal(current_time, pause_time); +} + +double G1MMUTrackerQueue::when_internal(double current_time, + double pause_time) { + // if the pause is over the maximum, just assume that it's the maximum + double adjusted_pause_time = + (pause_time > max_gc_time()) ? max_gc_time() : pause_time; + double earliest_end = current_time + adjusted_pause_time; + double limit = earliest_end - _time_slice; + double gc_time = calculate_gc_time(earliest_end); + double diff = gc_time + adjusted_pause_time - max_gc_time(); + if (is_double_leq_0(diff)) + return 0.0; + + int index = _tail_index; + while ( 1 ) { + G1MMUTrackerQueueElem *elem = &_array[index]; + if (elem->end_time() > limit) { + if (elem->start_time() > limit) + diff -= elem->duration(); + else + diff -= elem->end_time() - limit; + if (is_double_leq_0(diff)) + return elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time; + } + index = trim_index(index+1); + guarantee(index != trim_index(_head_index + 1), "should not go past head"); + } +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MMUTracker.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,130 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Keeps track of the GC work and decides when it is OK to do GC work +// and for how long so that the MMU invariants are maintained. + +/***** ALL TIMES ARE IN SECS!!!!!!! *****/ + +// this is the "interface" +class G1MMUTracker { +protected: + double _time_slice; + double _max_gc_time; // this is per time slice + + double _conc_overhead_time_sec; + +public: + G1MMUTracker(double time_slice, double max_gc_time); + + void update_conc_overhead(double conc_overhead); + + virtual void add_pause(double start, double end, bool gc_thread) = 0; + virtual double longest_pause(double current_time) = 0; + virtual double when_sec(double current_time, double pause_time) = 0; + + double max_gc_time() { + return _max_gc_time - _conc_overhead_time_sec; + } + + inline bool now_max_gc(double current_time) { + return when_sec(current_time, max_gc_time()) < 0.00001; + } + + inline double when_max_gc_sec(double current_time) { + return when_sec(current_time, max_gc_time()); + } + + inline jlong when_max_gc_ms(double current_time) { + double when = when_max_gc_sec(current_time); + return (jlong) (when * 1000.0); + } + + inline jlong when_ms(double current_time, double pause_time) { + double when = when_sec(current_time, pause_time); + return (jlong) (when * 1000.0); + } +}; + +class G1MMUTrackerQueueElem { +private: + double _start_time; + double _end_time; + +public: + inline double start_time() { return _start_time; } + inline double end_time() { return _end_time; } + inline double duration() { return _end_time - _start_time; } + + G1MMUTrackerQueueElem() { + _start_time = 0.0; + _end_time = 0.0; + } + + G1MMUTrackerQueueElem(double start_time, double end_time) { + _start_time = start_time; + _end_time = end_time; + } +}; + +// this is an implementation of the MMUTracker using a (fixed-size) queue +// that keeps track of all the recent pause times +class G1MMUTrackerQueue: public G1MMUTracker { +private: + enum PrivateConstants { + QueueLength = 64 + }; + + // The array keeps track of all the pauses that fall within a time + // slice (the last time slice during which pauses took place). + // The data structure implemented is a circular queue. + // Head "points" to the most recent addition, tail to the oldest one. + // The array is of fixed size and I don't think we'll need more than + // two or three entries with the current behaviour of G1 pauses. + // If the array is full, an easy fix is to look for the pauses with + // the shortest gap between them and concolidate them. + + G1MMUTrackerQueueElem _array[QueueLength]; + int _head_index; + int _tail_index; + int _no_entries; + + inline int trim_index(int index) { + return (index + QueueLength) % QueueLength; + } + + void remove_expired_entries(double current_time); + double calculate_gc_time(double current_time); + + double longest_pause_internal(double current_time); + double when_internal(double current_time, double pause_time); + +public: + G1MMUTrackerQueue(double time_slice, double max_gc_time); + + virtual void add_pause(double start, double end, bool gc_thread); + + virtual double longest_pause(double current_time); + virtual double when_sec(double current_time, double pause_time); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MarkSweep.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,381 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1MarkSweep.cpp.incl" + +class HeapRegion; + +void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, + bool clear_all_softrefs) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); + + // hook up weak ref data so it can be used during Mark-Sweep + assert(GenMarkSweep::ref_processor() == NULL, "no stomping"); + GenMarkSweep::_ref_processor = rp; + assert(rp != NULL, "should be non-NULL"); + + // When collecting the permanent generation methodOops may be moving, + // so we either have to flush all bcp data or convert it into bci. + CodeCache::gc_prologue(); + Threads::gc_prologue(); + + // Increment the invocation count for the permanent generation, since it is + // implicitly collected whenever we do a full mark sweep collection. + SharedHeap* sh = SharedHeap::heap(); + sh->perm_gen()->stat_record()->invocations++; + + bool marked_for_unloading = false; + + allocate_stacks(); + + mark_sweep_phase1(marked_for_unloading, clear_all_softrefs); + + if (G1VerifyConcMark) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + g1h->checkConcurrentMark(); + } + + mark_sweep_phase2(); + + // Don't add any more derived pointers during phase3 + COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); + + mark_sweep_phase3(); + + mark_sweep_phase4(); + + GenMarkSweep::restore_marks(); + + GenMarkSweep::deallocate_stacks(); + + // We must invalidate the perm-gen rs, so that it gets rebuilt. + GenRemSet* rs = sh->rem_set(); + rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/); + + // "free at last gc" is calculated from these. + // CHF: cheating for now!!! + // Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity()); + // Universe::set_heap_used_at_last_gc(Universe::heap()->used()); + + Threads::gc_epilogue(); + CodeCache::gc_epilogue(); + + // refs processing: clean slate + GenMarkSweep::_ref_processor = NULL; +} + + +void G1MarkSweep::allocate_stacks() { + GenMarkSweep::_preserved_count_max = 0; + GenMarkSweep::_preserved_marks = NULL; + GenMarkSweep::_preserved_count = 0; + GenMarkSweep::_preserved_mark_stack = NULL; + GenMarkSweep::_preserved_oop_stack = NULL; + + GenMarkSweep::_marking_stack = + new (ResourceObj::C_HEAP) GrowableArray(4000, true); + + size_t size = SystemDictionary::number_of_classes() * 2; + GenMarkSweep::_revisit_klass_stack = + new (ResourceObj::C_HEAP) GrowableArray((int)size, true); +} + +void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, + bool clear_all_softrefs) { + // Recursively traverse all live objects and mark them + EventMark m("1 mark object"); + TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace(" 1"); + + SharedHeap* sh = SharedHeap::heap(); + + sh->process_strong_roots(true, // Collecting permanent generation. + SharedHeap::SO_SystemClasses, + &GenMarkSweep::follow_root_closure, + &GenMarkSweep::follow_root_closure); + + // Process reference objects found during marking + ReferencePolicy *soft_ref_policy; + if (clear_all_softrefs) { + soft_ref_policy = new AlwaysClearPolicy(); + } else { +#ifdef COMPILER2 + soft_ref_policy = new LRUMaxHeapPolicy(); +#else + soft_ref_policy = new LRUCurrentHeapPolicy(); +#endif + } + assert(soft_ref_policy != NULL,"No soft reference policy"); + GenMarkSweep::ref_processor()->process_discovered_references( + soft_ref_policy, + &GenMarkSweep::is_alive, + &GenMarkSweep::keep_alive, + &GenMarkSweep::follow_stack_closure, + NULL); + + // Follow system dictionary roots and unload classes + bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive); + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); + + // Follow code cache roots (has to be done after system dictionary, + // assumes all live klasses are marked) + CodeCache::do_unloading(&GenMarkSweep::is_alive, + &GenMarkSweep::keep_alive, + purged_class); + GenMarkSweep::follow_stack(); + + // Update subklass/sibling/implementor links of live klasses + GenMarkSweep::follow_weak_klass_links(); + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); + + // Visit symbol and interned string tables and delete unmarked oops + SymbolTable::unlink(&GenMarkSweep::is_alive); + StringTable::unlink(&GenMarkSweep::is_alive); + + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); +} + +class G1PrepareCompactClosure: public HeapRegionClosure { + ModRefBarrierSet* _mrbs; + CompactPoint _cp; + bool _popular_only; + + void free_humongous_region(HeapRegion* hr) { + HeapWord* bot = hr->bottom(); + HeapWord* end = hr->end(); + assert(hr->startsHumongous(), + "Only the start of a humongous region should be freed."); + G1CollectedHeap::heap()->free_region(hr); + hr->prepare_for_compaction(&_cp); + // Also clear the part of the card table that will be unused after + // compaction. + _mrbs->clear(MemRegion(hr->compaction_top(), hr->end())); + } + +public: + G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) : + _cp(NULL, cs, cs->initialize_threshold()), + _mrbs(G1CollectedHeap::heap()->mr_bs()), + _popular_only(popular_only) + {} + bool doHeapRegion(HeapRegion* hr) { + if (_popular_only && !hr->popular()) + return true; // terminate early + else if (!_popular_only && hr->popular()) + return false; // skip this one. + + if (hr->isHumongous()) { + if (hr->startsHumongous()) { + oop obj = oop(hr->bottom()); + if (obj->is_gc_marked()) { + obj->forward_to(obj); + } else { + free_humongous_region(hr); + } + } else { + assert(hr->continuesHumongous(), "Invalid humongous."); + } + } else { + hr->prepare_for_compaction(&_cp); + // Also clear the part of the card table that will be unused after + // compaction. + _mrbs->clear(MemRegion(hr->compaction_top(), hr->end())); + } + return false; + } +}; +// Stolen verbatim from g1CollectedHeap.cpp +class FindFirstRegionClosure: public HeapRegionClosure { + HeapRegion* _a_region; + bool _find_popular; +public: + FindFirstRegionClosure(bool find_popular) : + _a_region(NULL), _find_popular(find_popular) {} + bool doHeapRegion(HeapRegion* r) { + if (r->popular() == _find_popular) { + _a_region = r; + return true; + } else { + return false; + } + } + HeapRegion* result() { return _a_region; } +}; + +void G1MarkSweep::mark_sweep_phase2() { + // Now all live objects are marked, compute the new object addresses. + + // It is imperative that we traverse perm_gen LAST. If dead space is + // allowed a range of dead object may get overwritten by a dead int + // array. If perm_gen is not traversed last a klassOop may get + // overwritten. This is fine since it is dead, but if the class has dead + // instances we have to skip them, and in order to find their size we + // need the klassOop! + // + // It is not required that we traverse spaces in the same order in + // phase2, phase3 and phase4, but the ValidateMarkSweep live oops + // tracking expects us to do so. See comment under phase4. + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + EventMark m("2 compute new addresses"); + TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("2"); + + // First we compact the popular regions. + if (G1NumPopularRegions > 0) { + CompactibleSpace* sp = g1h->first_compactible_space(); + FindFirstRegionClosure cl(true /*find_popular*/); + g1h->heap_region_iterate(&cl); + HeapRegion *r = cl.result(); + assert(r->popular(), "should have found a popular region."); + assert(r == sp, "first popular heap region should " + "== first compactible space"); + G1PrepareCompactClosure blk(sp, true/*popular_only*/); + g1h->heap_region_iterate(&blk); + } + + // Now we do the regular regions. + FindFirstRegionClosure cl(false /*find_popular*/); + g1h->heap_region_iterate(&cl); + HeapRegion *r = cl.result(); + assert(!r->popular(), "should have founda non-popular region."); + CompactibleSpace* sp = r; + if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) { + sp = r->next_compaction_space(); + } + + G1PrepareCompactClosure blk(sp, false/*popular_only*/); + g1h->heap_region_iterate(&blk); + + CompactPoint perm_cp(pg, NULL, NULL); + pg->prepare_for_compaction(&perm_cp); +} + +class G1AdjustPointersClosure: public HeapRegionClosure { + public: + bool doHeapRegion(HeapRegion* r) { + if (r->isHumongous()) { + if (r->startsHumongous()) { + // We must adjust the pointers on the single H object. + oop obj = oop(r->bottom()); + debug_only(GenMarkSweep::track_interior_pointers(obj)); + // point all the oops to the new location + obj->adjust_pointers(); + debug_only(GenMarkSweep::check_interior_pointers()); + } + } else { + // This really ought to be "as_CompactibleSpace"... + r->adjust_pointers(); + } + return false; + } +}; + +void G1MarkSweep::mark_sweep_phase3() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + // Adjust the pointers to reflect the new locations + EventMark m("3 adjust pointers"); + TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("3"); + + SharedHeap* sh = SharedHeap::heap(); + + sh->process_strong_roots(true, // Collecting permanent generation. + SharedHeap::SO_AllClasses, + &GenMarkSweep::adjust_root_pointer_closure, + &GenMarkSweep::adjust_pointer_closure); + + g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure); + + // Now adjust pointers in remaining weak roots. (All of which should + // have been cleared if they pointed to non-surviving objects.) + g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure, + &GenMarkSweep::adjust_pointer_closure); + + GenMarkSweep::adjust_marks(); + + G1AdjustPointersClosure blk; + g1h->heap_region_iterate(&blk); + pg->adjust_pointers(); +} + +class G1SpaceCompactClosure: public HeapRegionClosure { +public: + G1SpaceCompactClosure() {} + + bool doHeapRegion(HeapRegion* hr) { + if (hr->isHumongous()) { + if (hr->startsHumongous()) { + oop obj = oop(hr->bottom()); + if (obj->is_gc_marked()) { + obj->init_mark(); + } else { + assert(hr->is_empty(), "Should have been cleared in phase 2."); + } + hr->reset_during_compaction(); + } + } else { + hr->compact(); + } + return false; + } +}; + +void G1MarkSweep::mark_sweep_phase4() { + // All pointers are now adjusted, move objects accordingly + + // It is imperative that we traverse perm_gen first in phase4. All + // classes must be allocated earlier than their instances, and traversing + // perm_gen first makes sure that all klassOops have moved to their new + // location before any instance does a dispatch through it's klass! + + // The ValidateMarkSweep live oops tracking expects us to traverse spaces + // in the same order in phase2, phase3 and phase4. We don't quite do that + // here (perm_gen first rather than last), so we tell the validate code + // to use a higher index (saved from phase2) when verifying perm_gen. + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + EventMark m("4 compact heap"); + TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("4"); + + pg->compact(); + + G1SpaceCompactClosure blk; + g1h->heap_region_iterate(&blk); + +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MarkSweep.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,57 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class ReferenceProcessor; + +// G1MarkSweep takes care of global mark-compact garbage collection for a +// G1CollectedHeap using a four-phase pointer forwarding algorithm. All +// generations are assumed to support marking; those that can also support +// compaction. +// +// Class unloading will only occur when a full gc is invoked. + + +class G1MarkSweep : AllStatic { + friend class VM_G1MarkSweep; + friend class Scavenge; + + public: + + static void invoke_at_safepoint(ReferenceProcessor* rp, + bool clear_all_softrefs); + + private: + + // Mark live objects + static void mark_sweep_phase1(bool& marked_for_deopt, + bool clear_all_softrefs); + // Calculate new addresses + static void mark_sweep_phase2(); + // Update pointers + static void mark_sweep_phase3(); + // Move objects to new positions + static void mark_sweep_phase4(); + + static void allocate_stacks(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1OopClosures.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,202 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class HeapRegion; +class G1CollectedHeap; +class G1RemSet; +class HRInto_G1RemSet; +class G1RemSet; +class ConcurrentMark; +class DirtyCardToOopClosure; +class CMBitMap; +class CMMarkStack; +class G1ParScanThreadState; + +// A class that scans oops in a given heap region (much as OopsInGenClosure +// scans oops in a generation.) +class OopsInHeapRegionClosure: public OopsInGenClosure { +protected: + HeapRegion* _from; +public: + virtual void set_region(HeapRegion* from) { _from = from; } +}; + + +class G1ScanAndBalanceClosure : public OopClosure { + G1CollectedHeap* _g1; + static int _nq; +public: + G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { } + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } +}; + +class G1ParClosureSuper : public OopsInHeapRegionClosure { +protected: + G1CollectedHeap* _g1; + G1RemSet* _g1_rem; + ConcurrentMark* _cm; + G1ParScanThreadState* _par_scan_state; +public: + G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); + bool apply_to_weak_ref_discovered_field() { return true; } +}; + +class G1ParScanClosure : public G1ParClosureSuper { +public: + G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + G1ParClosureSuper(g1, par_scan_state) { } + void do_oop_nv(oop* p); // should be made inline + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + +#define G1_PARTIAL_ARRAY_MASK 1 + +class G1ParScanPartialArrayClosure : public G1ParClosureSuper { + G1ParScanClosure _scanner; + template void process_array_chunk(oop obj, int start, int end); +public: + G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { } + void do_oop_nv(oop* p); + void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + + +class G1ParCopyHelper : public G1ParClosureSuper { + G1ParScanClosure *_scanner; +protected: + void mark_forwardee(oop* p); + oop copy_to_survivor_space(oop obj); +public: + G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, + G1ParScanClosure *scanner) : + G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { } +}; + +template +class G1ParCopyClosure : public G1ParCopyHelper { + G1ParScanClosure _scanner; + void do_oop_work(oop* p); + void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); } +public: + G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { } + inline void do_oop_nv(oop* p) { + do_oop_work(p); + if (do_mark_forwardee) + mark_forwardee(p); + } + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + +typedef G1ParCopyClosure G1ParScanExtRootClosure; +typedef G1ParCopyClosure G1ParScanPermClosure; +typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; +typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; +typedef G1ParCopyClosure G1ParScanHeapRSClosure; +typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; + + +class FilterIntoCSClosure: public OopClosure { + G1CollectedHeap* _g1; + OopClosure* _oc; + DirtyCardToOopClosure* _dcto_cl; +public: + FilterIntoCSClosure( DirtyCardToOopClosure* dcto_cl, + G1CollectedHeap* g1, OopClosure* oc) : + _dcto_cl(dcto_cl), _g1(g1), _oc(oc) + {} + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } +}; + +class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure { + G1CollectedHeap* _g1; + OopsInHeapRegionClosure* _oc; +public: + FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1, + OopsInHeapRegionClosure* oc) : + _g1(g1), _oc(oc) + {} + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + void set_region(HeapRegion* from) { + _oc->set_region(from); + } +}; + +class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure { + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + OopsInHeapRegionClosure* _oc; +public: + FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1, + OopsInHeapRegionClosure* oc, + ConcurrentMark* cm) + : _g1(g1), _oc(oc), _cm(cm) { } + + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + void set_region(HeapRegion* from) { + _oc->set_region(from); + } +}; + +class FilterOutOfRegionClosure: public OopClosure { + HeapWord* _r_bottom; + HeapWord* _r_end; + OopClosure* _oc; + int _out_of_region; +public: + FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc); + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + int out_of_region() { return _out_of_region; } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,112 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/* + * This really ought to be an inline function, but apparently the C++ + * compiler sometimes sees fit to ignore inline declarations. Sigh. + */ + +// This must a ifdef'ed because the counting it controls is in a +// perf-critical inner loop. +#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0 + +inline void FilterIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL && _g1->obj_in_cs(obj)) { + _oc->do_oop(p); +#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT + _dcto_cl->incr_count(); +#endif + } +} + +inline void FilterIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0 + +inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) { + oop obj = *p; + HeapWord* obj_hw = (HeapWord*)obj; + if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) { + _oc->do_oop(p); +#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT + _out_of_region++; +#endif + } +} + +inline void FilterOutOfRegionClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL && _g1->obj_in_cs(obj)) + _oc->do_oop(p); +} + +inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + + +inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL) { + HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj); + if (hr != NULL) { + if (hr->in_collection_set()) + _oc->do_oop(p); + else if (!hr->is_young()) + _cm->grayRoot(obj); + } + } +} + +inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) { + RefToScanQueue* q; + if (ParallelGCThreads > 0) { + // Deal the work out equally. + _nq = (_nq + 1) % ParallelGCThreads; + q = _g1->task_queue(_nq); + } else { + q = _g1->task_queue(0); + } + bool nooverflow = q->push(p); + guarantee(nooverflow, "Overflow during poplularity region processing"); +} + +inline void G1ScanAndBalanceClosure::do_oop(oop* p) { + do_oop_nv(p); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,1003 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1RemSet.cpp.incl" + +#define CARD_REPEAT_HISTO 0 + +#if CARD_REPEAT_HISTO +static size_t ct_freq_sz; +static jbyte* ct_freq = NULL; + +void init_ct_freq_table(size_t heap_sz_bytes) { + if (ct_freq == NULL) { + ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size; + ct_freq = new jbyte[ct_freq_sz]; + for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0; + } +} + +void ct_freq_note_card(size_t index) { + assert(0 <= index && index < ct_freq_sz, "Bounds error."); + if (ct_freq[index] < 100) { ct_freq[index]++; } +} + +static IntHistogram card_repeat_count(10, 10); + +void ct_freq_update_histo_and_reset() { + for (size_t j = 0; j < ct_freq_sz; j++) { + card_repeat_count.add_entry(ct_freq[j]); + ct_freq[j] = 0; + } + +} +#endif + + +class IntoCSOopClosure: public OopsInHeapRegionClosure { + OopsInHeapRegionClosure* _blk; + G1CollectedHeap* _g1; +public: + IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) : + _g1(g1), _blk(blk) {} + void set_region(HeapRegion* from) { + _blk->set_region(from); + } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) _blk->do_oop(p); + } + bool apply_to_weak_ref_discovered_field() { return true; } + bool idempotent() { return true; } +}; + +class IntoCSRegionClosure: public HeapRegionClosure { + IntoCSOopClosure _blk; + G1CollectedHeap* _g1; +public: + IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) : + _g1(g1), _blk(g1, blk) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set()) { + _blk.set_region(r); + if (r->isHumongous()) { + if (r->startsHumongous()) { + oop obj = oop(r->bottom()); + obj->oop_iterate(&_blk); + } + } else { + r->oop_before_save_marks_iterate(&_blk); + } + } + return false; + } +}; + +void +StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, + int worker_i) { + IntoCSRegionClosure rc(_g1, oc); + _g1->heap_region_iterate(&rc); +} + +class UpdateRSOopClosure: public OopClosure { + HeapRegion* _from; + HRInto_G1RemSet* _rs; + int _worker_i; +public: + UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : + _from(NULL), _rs(rs), _worker_i(worker_i) { + guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); + } + + void set_from(HeapRegion* from) { + assert(from != NULL, "from region must be non-NULL"); + _from = from; + } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + assert(_from != NULL, "from region must be non-NULL"); + _rs->par_write_ref(_from, p, _worker_i); + } + // Override: this closure is idempotent. + // bool idempotent() { return true; } + bool apply_to_weak_ref_discovered_field() { return true; } +}; + +class UpdateRSOutOfRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + ModRefBarrierSet* _mr_bs; + UpdateRSOopClosure _cl; + int _worker_i; +public: + UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : + _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), + _mr_bs(g1->mr_bs()), + _worker_i(worker_i), + _g1h(g1) + {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set() && !r->continuesHumongous()) { + _cl.set_from(r); + r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); + _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); + } + return false; + } +}; + +class VerifyRSCleanCardOopClosure: public OopClosure { + G1CollectedHeap* _g1; +public: + VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {} + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + HeapRegion* to = _g1->heap_region_containing(obj); + guarantee(to == NULL || !to->in_collection_set(), + "Missed a rem set member."); + } +}; + +HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) + : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()), + _cg1r(g1->concurrent_g1_refine()), + _par_traversal_in_progress(false), _new_refs(NULL), + _cards_scanned(NULL), _total_cards_scanned(0) +{ + _seq_task = new SubTasksDone(NumSeqTasks); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); +} + +HRInto_G1RemSet::~HRInto_G1RemSet() { + delete _seq_task; +} + +void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { + if (_g1->is_in_g1_reserved(mr.start())) { + _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size)); + if (_start_first == NULL) _start_first = mr.start(); + } +} + +class ScanRSClosure : public HeapRegionClosure { + size_t _cards_done, _cards; + G1CollectedHeap* _g1h; + OopsInHeapRegionClosure* _oc; + G1BlockOffsetSharedArray* _bot_shared; + CardTableModRefBS *_ct_bs; + int _worker_i; + bool _try_claimed; +public: + ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) : + _oc(oc), + _cards(0), + _cards_done(0), + _worker_i(worker_i), + _try_claimed(false) + { + _g1h = G1CollectedHeap::heap(); + _bot_shared = _g1h->bot_shared(); + _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set()); + } + + void set_try_claimed() { _try_claimed = true; } + + void scanCard(size_t index, HeapRegion *r) { + _cards_done++; + DirtyCardToOopClosure* cl = + r->new_dcto_closure(_oc, + CardTableModRefBS::Precise, + HeapRegionDCTOC::IntoCSFilterKind); + + // Set the "from" region in the closure. + _oc->set_region(r); + HeapWord* card_start = _bot_shared->address_for_index(index); + HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words; + Space *sp = SharedHeap::heap()->space_containing(card_start); + MemRegion sm_region; + if (ParallelGCThreads > 0) { + // first find the used area + sm_region = sp->used_region_at_save_marks(); + } else { + // The closure is not idempotent. We shouldn't look at objects + // allocated during the GC. + sm_region = sp->used_region_at_save_marks(); + } + MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end)); + if (!mr.is_empty()) { + cl->do_MemRegion(mr); + } + } + + void printCard(HeapRegion* card_region, size_t card_index, + HeapWord* card_start) { + gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") " + "RS names card %p: " + "[" PTR_FORMAT ", " PTR_FORMAT ")", + _worker_i, + card_region->bottom(), card_region->end(), + card_index, + card_start, card_start + G1BlockOffsetSharedArray::N_words); + } + + bool doHeapRegion(HeapRegion* r) { + assert(r->in_collection_set(), "should only be called on elements of CS."); + HeapRegionRemSet* hrrs = r->rem_set(); + if (hrrs->iter_is_complete()) return false; // All done. + if (!_try_claimed && !hrrs->claim_iter()) return false; + // If we didn't return above, then + // _try_claimed || r->claim_iter() + // is true: either we're supposed to work on claimed-but-not-complete + // regions, or we successfully claimed the region. + HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i); + hrrs->init_iterator(iter); + size_t card_index; + while (iter->has_next(card_index)) { + HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index); + +#if 0 + gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n", + card_start, card_start + CardTableModRefBS::card_size_in_words); +#endif + + HeapRegion* card_region = _g1h->heap_region_containing(card_start); + assert(card_region != NULL, "Yielding cards not in the heap?"); + _cards++; + + if (!card_region->in_collection_set()) { + // If the card is dirty, then we will scan it during updateRS. + if (!_ct_bs->is_card_claimed(card_index) && + !_ct_bs->is_card_dirty(card_index)) { + assert(_ct_bs->is_card_clean(card_index) || + _ct_bs->is_card_claimed(card_index), + "Card is either dirty, clean, or claimed"); + if (_ct_bs->claim_card(card_index)) + scanCard(card_index, card_region); + } + } + } + hrrs->set_iter_complete(); + return false; + } + // Set all cards back to clean. + void cleanup() {_g1h->cleanUpCardTable();} + size_t cards_done() { return _cards_done;} + size_t cards_looked_up() { return _cards;} +}; + +// We want the parallel threads to start their scanning at +// different collection set regions to avoid contention. +// If we have: +// n collection set regions +// p threads +// Then thread t will start at region t * floor (n/p) + +HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) { + HeapRegion* result = _g1p->collection_set(); + if (ParallelGCThreads > 0) { + size_t cs_size = _g1p->collection_set_size(); + int n_workers = _g1->workers()->total_workers(); + size_t cs_spans = cs_size / n_workers; + size_t ind = cs_spans * worker_i; + for (size_t i = 0; i < ind; i++) + result = result->next_in_collection_set(); + } + return result; +} + +void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { + double rs_time_start = os::elapsedTime(); + HeapRegion *startRegion = calculateStartRegion(worker_i); + + BufferingOopsInHeapRegionClosure boc(oc); + ScanRSClosure scanRScl(&boc, worker_i); + _g1->collection_set_iterate_from(startRegion, &scanRScl); + scanRScl.set_try_claimed(); + _g1->collection_set_iterate_from(startRegion, &scanRScl); + + boc.done(); + double closure_app_time_sec = boc.closure_app_seconds(); + double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - + closure_app_time_sec; + double closure_app_time_ms = closure_app_time_sec * 1000.0; + + assert( _cards_scanned != NULL, "invariant" ); + _cards_scanned[worker_i] = scanRScl.cards_done(); + + _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); + _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); + if (ParallelGCThreads > 0) { + // In this case, we called scanNewRefsRS and recorded the corresponding + // time. + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; + } + } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); +} + +void HRInto_G1RemSet::updateRS(int worker_i) { + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + + double start = os::elapsedTime(); + _g1p->record_update_rs_start_time(worker_i, start * 1000.0); + + if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { + // Apply the appropriate closure to all remaining log entries. + _g1->iterate_dirty_card_closure(false, worker_i); + // Now there should be no dirty cards. + if (G1RSLogCheckCardTable) { + CountNonCleanMemRegionClosure cl(_g1); + _ct_bs->mod_card_iterate(&cl); + // XXX This isn't true any more: keeping cards of young regions + // marked dirty broke it. Need some reasonable fix. + guarantee(cl.n() == 0, "Card table should be clean."); + } + } else { + UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); + _g1->heap_region_iterate(&update_rs); + // We did a traversal; no further one is necessary. + if (G1RSBarrierUseQueue) { + assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); + cg1r->set_pya_cancel(); + } + if (_cg1r->use_cache()) { + _cg1r->clear_and_record_card_counts(); + _cg1r->clear_hot_cache(); + } + } + _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); +} + +#ifndef PRODUCT +class PrintRSClosure : public HeapRegionClosure { + int _count; +public: + PrintRSClosure() : _count(0) {} + bool doHeapRegion(HeapRegion* r) { + HeapRegionRemSet* hrrs = r->rem_set(); + _count += (int) hrrs->occupied(); + if (hrrs->occupied() == 0) { + gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") " + "has no remset entries\n", + r->bottom(), r->end()); + } else { + gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n", + r->bottom(), r->end()); + r->print(); + hrrs->print(); + gclog_or_tty->print("\nDone printing rem set\n"); + } + return false; + } + int occupied() {return _count;} +}; +#endif + +class CountRSSizeClosure: public HeapRegionClosure { + size_t _n; + size_t _tot; + size_t _max; + HeapRegion* _max_r; + enum { + N = 20, + MIN = 6 + }; + int _histo[N]; +public: + CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) { + for (int i = 0; i < N; i++) _histo[i] = 0; + } + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + size_t occ = r->rem_set()->occupied(); + _n++; + _tot += occ; + if (occ > _max) { + _max = occ; + _max_r = r; + } + // Fit it into a histo bin. + int s = 1 << MIN; + int i = 0; + while (occ > (size_t) s && i < (N-1)) { + s = s << 1; + i++; + } + _histo[i]++; + } + return false; + } + size_t n() { return _n; } + size_t tot() { return _tot; } + size_t mx() { return _max; } + HeapRegion* mxr() { return _max_r; } + void print_histo() { + int mx = N; + while (mx >= 0) { + if (_histo[mx-1] > 0) break; + mx--; + } + gclog_or_tty->print_cr("Number of regions with given RS sizes:"); + gclog_or_tty->print_cr(" <= %8d %8d", 1 << MIN, _histo[0]); + for (int i = 1; i < mx-1; i++) { + gclog_or_tty->print_cr(" %8d - %8d %8d", + (1 << (MIN + i - 1)) + 1, + 1 << (MIN + i), + _histo[i]); + } + gclog_or_tty->print_cr(" > %8d %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]); + } +}; + +void +HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, + int worker_i) { + double scan_new_refs_start_sec = os::elapsedTime(); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); + while (_new_refs[worker_i]->is_nonempty()) { + oop* p = _new_refs[worker_i]->pop(); + oop obj = *p; + // *p was in the collection set when p was pushed on "_new_refs", but + // another thread may have processed this location from an RS, so it + // might not point into the CS any longer. If so, it's obviously been + // processed, and we don't need to do anything further. + if (g1h->obj_in_cs(obj)) { + HeapRegion* r = g1h->heap_region_containing(p); + + DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); + assert(ParallelGCThreads > 1 + || to->rem_set()->contains_reference(p), + "Invariant: pushed after being added." + "(Not reliable in parallel code.)"); + oc->set_region(r); + // If "p" has already been processed concurrently, this is + // idempotent. + oc->do_oop(p); + } + } + _g1p->record_scan_new_refs_time(worker_i, + (os::elapsedTime() - scan_new_refs_start_sec) + * 1000.0); +} + +void HRInto_G1RemSet::set_par_traversal(bool b) { + _par_traversal_in_progress = b; + HeapRegionRemSet::set_par_traversal(b); +} + +void HRInto_G1RemSet::cleanupHRRS() { + HeapRegionRemSet::cleanup(); +} + +void +HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, + int worker_i) { +#if CARD_REPEAT_HISTO + ct_freq_update_histo_and_reset(); +#endif + if (worker_i == 0) { + _cg1r->clear_and_record_card_counts(); + } + + // Make this into a command-line flag... + if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) { + CountRSSizeClosure count_cl; + _g1->heap_region_iterate(&count_cl); + gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, " + "max region is " PTR_FORMAT, + count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(), + count_cl.mx(), count_cl.mxr()); + count_cl.print_histo(); + } + + if (ParallelGCThreads > 0) { + // This is a temporary change to serialize the update and scanning + // of remembered sets. There are some race conditions when this is + // done in parallel and they are causing failures. When we resolve + // said race conditions, we'll revert back to parallel remembered + // set updating and scanning. See CRs 6677707 and 6677708. + if (worker_i == 0) { + updateRS(worker_i); + scanNewRefsRS(oc, worker_i); + scanRS(oc, worker_i); + } + } else { + assert(worker_i == 0, "invariant"); + + updateRS(0); + scanRS(oc, 0); + } +} + +void HRInto_G1RemSet:: +prepare_for_oops_into_collection_set_do() { +#if G1_REM_SET_LOGGING + PrintRSClosure cl; + _g1->collection_set_iterate(&cl); +#endif + cleanupHRRS(); + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + _g1->set_refine_cte_cl_concurrency(false); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.concatenate_logs(); + + assert(!_par_traversal_in_progress, "Invariant between iterations."); + if (ParallelGCThreads > 0) { + set_par_traversal(true); + int n_workers = _g1->workers()->total_workers(); + _seq_task->set_par_threads(n_workers); + for (uint i = 0; i < ParallelGCThreads; i++) + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + + if (cg1r->do_traversal()) { + updateRS(0); + // Have to do this again after updaters + cleanupHRRS(); + } + } + guarantee( _cards_scanned == NULL, "invariant" ); + _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); + _total_cards_scanned = 0; +} + + +class cleanUpIteratorsClosure : public HeapRegionClosure { + bool doHeapRegion(HeapRegion *r) { + HeapRegionRemSet* hrrs = r->rem_set(); + hrrs->init_for_par_iteration(); + return false; + } +}; + +void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { + guarantee( _cards_scanned != NULL, "invariant" ); + _total_cards_scanned = 0; + for (uint i = 0; i < n_workers(); ++i) + _total_cards_scanned += _cards_scanned[i]; + FREE_C_HEAP_ARRAY(size_t, _cards_scanned); + _cards_scanned = NULL; + // Cleanup after copy +#if G1_REM_SET_LOGGING + PrintRSClosure cl; + _g1->heap_region_iterate(&cl); +#endif + _g1->set_refine_cte_cl_concurrency(true); + cleanUpIteratorsClosure iterClosure; + _g1->collection_set_iterate(&iterClosure); + // Set all cards back to clean. + _g1->cleanUpCardTable(); + if (ParallelGCThreads > 0) { + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + if (cg1r->do_traversal()) { + cg1r->cg1rThread()->set_do_traversal(false); + } + for (uint i = 0; i < ParallelGCThreads; i++) { + delete _new_refs[i]; + } + set_par_traversal(false); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); +} + +class UpdateRSObjectClosure: public ObjectClosure { + UpdateRSOopClosure* _update_rs_oop_cl; +public: + UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) : + _update_rs_oop_cl(update_rs_oop_cl) {} + void do_object(oop obj) { + obj->oop_iterate(_update_rs_oop_cl); + } + +}; + +class ScrubRSClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + BitMap* _region_bm; + BitMap* _card_bm; + CardTableModRefBS* _ctbs; +public: + ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) : + _g1h(G1CollectedHeap::heap()), + _region_bm(region_bm), _card_bm(card_bm), + _ctbs(NULL) + { + ModRefBarrierSet* bs = _g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + _ctbs = (CardTableModRefBS*)bs; + } + + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + r->rem_set()->scrub(_ctbs, _region_bm, _card_bm); + } + return false; + } +}; + +void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) { + ScrubRSClosure scrub_cl(region_bm, card_bm); + _g1->heap_region_iterate(&scrub_cl); +} + +void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) { + ScrubRSClosure scrub_cl(region_bm, card_bm); + _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); +} + + +class ConcRefineRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; + ConcurrentGCThread* _cgc_thrd; + ConcurrentG1Refine* _cg1r; + unsigned _cards_processed; + UpdateRSOopClosure _update_rs_oop_cl; +public: + ConcRefineRegionClosure(CardTableModRefBS* ctbs, + ConcurrentG1Refine* cg1r, + HRInto_G1RemSet* g1rs) : + _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), + _update_rs_oop_cl(g1rs), _cards_processed(0), + _g1h(G1CollectedHeap::heap()) + {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set() && + !r->continuesHumongous() && + !r->is_young()) { + _update_rs_oop_cl.set_from(r); + UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); + + // For each run of dirty card in the region: + // 1) Clear the cards. + // 2) Process the range corresponding to the run, adding any + // necessary RS entries. + // 1 must precede 2, so that a concurrent modification redirties the + // card. If a processing attempt does not succeed, because it runs + // into an unparseable region, we will do binary search to find the + // beginning of the next parseable region. + HeapWord* startAddr = r->bottom(); + HeapWord* endAddr = r->used_region().end(); + HeapWord* lastAddr; + HeapWord* nextAddr; + + for (nextAddr = lastAddr = startAddr; + nextAddr < endAddr; + nextAddr = lastAddr) { + MemRegion dirtyRegion; + + // Get and clear dirty region from card table + MemRegion next_mr(nextAddr, endAddr); + dirtyRegion = + _ctbs->dirty_card_range_after_reset( + next_mr, + true, CardTableModRefBS::clean_card_val()); + assert(dirtyRegion.start() >= nextAddr, + "returned region inconsistent?"); + + if (!dirtyRegion.is_empty()) { + HeapWord* stop_point = + r->object_iterate_mem_careful(dirtyRegion, + &update_rs_obj_cl); + if (stop_point == NULL) { + lastAddr = dirtyRegion.end(); + _cards_processed += + (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); + } else { + // We're going to skip one or more cards that we can't parse. + HeapWord* next_parseable_card = + r->next_block_start_careful(stop_point); + // Round this up to a card boundary. + next_parseable_card = + _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); + // Now we invalidate the intervening cards so we'll see them + // again. + MemRegion remaining_dirty = + MemRegion(stop_point, dirtyRegion.end()); + MemRegion skipped = + MemRegion(stop_point, next_parseable_card); + _ctbs->invalidate(skipped.intersection(remaining_dirty)); + + // Now start up again where we can parse. + lastAddr = next_parseable_card; + + // Count how many we did completely. + _cards_processed += + (stop_point - dirtyRegion.start()) / + CardTableModRefBS::card_size_in_words; + } + // Allow interruption at regular intervals. + // (Might need to make them more regular, if we get big + // dirty regions.) + if (_cgc_thrd != NULL) { + if (_cgc_thrd->should_yield()) { + _cgc_thrd->yield(); + switch (_cg1r->get_pya()) { + case PYA_continue: + // This may have changed: re-read. + endAddr = r->used_region().end(); + continue; + case PYA_restart: case PYA_cancel: + return true; + } + } + } + } else { + break; + } + } + } + // A good yield opportunity. + if (_cgc_thrd != NULL) { + if (_cgc_thrd->should_yield()) { + _cgc_thrd->yield(); + switch (_cg1r->get_pya()) { + case PYA_restart: case PYA_cancel: + return true; + default: + break; + } + + } + } + return false; + } + + unsigned cards_processed() { return _cards_processed; } +}; + + +void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { + ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); + _g1->heap_region_iterate(&cr_cl); + _conc_refine_traversals++; + _conc_refine_cards += cr_cl.cards_processed(); +} + +static IntHistogram out_of_histo(50, 50); + + + +void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { + // If the card is no longer dirty, nothing to do. + if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; + + // Construct the region representing the card. + HeapWord* start = _ct_bs->addr_for(card_ptr); + // And find the region containing it. + HeapRegion* r = _g1->heap_region_containing(start); + if (r == NULL) { + guarantee(_g1->is_in_permanent(start), "Or else where?"); + return; // Not in the G1 heap (might be in perm, for example.) + } + // Why do we have to check here whether a card is on a young region, + // given that we dirty young regions and, as a result, the + // post-barrier is supposed to filter them out and never to enqueue + // them? When we allocate a new region as the "allocation region" we + // actually dirty its cards after we release the lock, since card + // dirtying while holding the lock was a performance bottleneck. So, + // as a result, it is possible for other threads to actually + // allocate objects in the region (after the acquire the lock) + // before all the cards on the region are dirtied. This is unlikely, + // and it doesn't happen often, but it can happen. So, the extra + // check below filters out those cards. + if (r->is_young()) { + return; + } + // While we are processing RSet buffers during the collection, we + // actually don't want to scan any cards on the collection set, + // since we don't want to update remebered sets with entries that + // point into the collection set, given that live objects from the + // collection set are about to move and such entries will be stale + // very soon. This change also deals with a reliability issue which + // involves scanning a card in the collection set and coming across + // an array that was being chunked and looking malformed. Note, + // however, that if evacuation fails, we have to scan any objects + // that were not moved and create any missing entries. + if (r->in_collection_set()) { + return; + } + + // Should we defer it? + if (_cg1r->use_cache()) { + card_ptr = _cg1r->cache_insert(card_ptr); + // If it was not an eviction, nothing to do. + if (card_ptr == NULL) return; + + // OK, we have to reset the card start, region, etc. + start = _ct_bs->addr_for(card_ptr); + r = _g1->heap_region_containing(start); + if (r == NULL) { + guarantee(_g1->is_in_permanent(start), "Or else where?"); + return; // Not in the G1 heap (might be in perm, for example.) + } + guarantee(!r->is_young(), "It was evicted in the current minor cycle."); + } + + HeapWord* end = _ct_bs->addr_for(card_ptr + 1); + MemRegion dirtyRegion(start, end); + +#if CARD_REPEAT_HISTO + init_ct_freq_table(_g1->g1_reserved_obj_bytes()); + ct_freq_note_card(_ct_bs->index_for(start)); +#endif + + UpdateRSOopClosure update_rs_oop_cl(this, worker_i); + update_rs_oop_cl.set_from(r); + FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl); + + // Undirty the card. + *card_ptr = CardTableModRefBS::clean_card_val(); + // We must complete this write before we do any of the reads below. + OrderAccess::storeload(); + // And process it, being careful of unallocated portions of TLAB's. + HeapWord* stop_point = + r->oops_on_card_seq_iterate_careful(dirtyRegion, + &filter_then_update_rs_oop_cl); + // If stop_point is non-null, then we encountered an unallocated region + // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the + // card and re-enqueue: if we put off the card until a GC pause, then the + // unallocated portion will be filled in. Alternatively, we might try + // the full complexity of the technique used in "regular" precleaning. + if (stop_point != NULL) { + // The card might have gotten re-dirtied and re-enqueued while we + // worked. (In fact, it's pretty likely.) + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + DirtyCardQueue* sdcq = + JavaThread::dirty_card_queue_set().shared_dirty_card_queue(); + sdcq->enqueue(card_ptr); + } + } else { + out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region()); + _conc_refine_cards++; + } +} + +class HRRSStatsIter: public HeapRegionClosure { + size_t _occupied; + size_t _total_mem_sz; + size_t _max_mem_sz; + HeapRegion* _max_mem_sz_region; +public: + HRRSStatsIter() : + _occupied(0), + _total_mem_sz(0), + _max_mem_sz(0), + _max_mem_sz_region(NULL) + {} + + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + size_t mem_sz = r->rem_set()->mem_size(); + if (mem_sz > _max_mem_sz) { + _max_mem_sz = mem_sz; + _max_mem_sz_region = r; + } + _total_mem_sz += mem_sz; + size_t occ = r->rem_set()->occupied(); + _occupied += occ; + return false; + } + size_t total_mem_sz() { return _total_mem_sz; } + size_t max_mem_sz() { return _max_mem_sz; } + size_t occupied() { return _occupied; } + HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } +}; + +void HRInto_G1RemSet::print_summary_info() { + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + ConcurrentG1RefineThread* cg1r_thrd = + g1->concurrent_g1_refine()->cg1rThread(); + +#if CARD_REPEAT_HISTO + gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); + gclog_or_tty->print_cr(" # of repeats --> # of cards with that number."); + card_repeat_count.print_on(gclog_or_tty); +#endif + + if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) { + gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: "); + gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); + out_of_histo.print_on(gclog_or_tty); + } + gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " + "%5.2fs.", + _conc_refine_cards, cg1r_thrd->vtime_accum()); + + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + jint tot_processed_buffers = + dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); + gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); + gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", + dcqs.processed_buffers_rs_thread(), + 100.0*(float)dcqs.processed_buffers_rs_thread()/ + (float)tot_processed_buffers); + gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.", + dcqs.processed_buffers_mut(), + 100.0*(float)dcqs.processed_buffers_mut()/ + (float)tot_processed_buffers); + gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", + _conc_refine_traversals); + if (!G1RSBarrierUseQueue) { + gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", + _conc_refine_traversals > 0 ? + (float)_conc_refine_cards/(float)_conc_refine_traversals : + 0); + } + gclog_or_tty->print_cr(""); + if (G1UseHRIntoRS) { + HRRSStatsIter blk; + g1->heap_region_iterate(&blk); + gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K." + " Max = " SIZE_FORMAT "K.", + blk.total_mem_sz()/K, blk.max_mem_sz()/K); + gclog_or_tty->print_cr(" Static structures = " SIZE_FORMAT "K," + " free_lists = " SIZE_FORMAT "K.", + HeapRegionRemSet::static_mem_size()/K, + HeapRegionRemSet::fl_mem_size()/K); + gclog_or_tty->print_cr(" %d occupied cards represented.", + blk.occupied()); + gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )" + " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.", + blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(), + (blk.max_mem_sz_region()->popular() ? "POP" : ""), + (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K, + (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K); + gclog_or_tty->print_cr(" Did %d coarsenings.", + HeapRegionRemSet::n_coarsenings()); + + } +} +void HRInto_G1RemSet::prepare_for_verify() { + if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) { + cleanupHRRS(); + _g1->set_refine_cte_cl_concurrency(false); + if (SafepointSynchronize::is_at_safepoint()) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.concatenate_logs(); + } + bool cg1r_use_cache = _cg1r->use_cache(); + _cg1r->set_use_cache(false); + updateRS(0); + _cg1r->set_use_cache(cg1r_use_cache); + } +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,216 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A G1RemSet provides ways of iterating over pointers into a selected +// collection set. + +class G1CollectedHeap; +class CardTableModRefBarrierSet; +class HRInto_G1RemSet; +class ConcurrentG1Refine; + +class G1RemSet { +protected: + G1CollectedHeap* _g1; + + unsigned _conc_refine_traversals; + unsigned _conc_refine_cards; + + size_t n_workers(); + +public: + G1RemSet(G1CollectedHeap* g1) : + _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) + {} + + // Invoke "blk->do_oop" on all pointers into the CS in object in regions + // outside the CS (having invoked "blk->set_region" to set the "from" + // region correctly beforehand.) The "worker_i" param is for the + // parallel case where the number of the worker thread calling this + // function can be helpful in partitioning the work to be done. It + // should be the same as the "i" passed to the calling thread's + // work(i) function. In the sequential case this param will be ingored. + virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i) = 0; + + // Prepare for and cleanup after an oops_into_collection_set_do + // call. Must call each of these once before and after (in sequential + // code) any threads call oops into collection set do. (This offers an + // opportunity to sequential setup and teardown of structures needed by a + // parallel iteration over the CS's RS.) + virtual void prepare_for_oops_into_collection_set_do() = 0; + virtual void cleanup_after_oops_into_collection_set_do() = 0; + + // If "this" is of the given subtype, return "this", else "NULL". + virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; } + + // Record, if necessary, the fact that *p (where "p" is in region "from") + // has changed to its new value. + virtual void write_ref(HeapRegion* from, oop* p) = 0; + virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0; + + // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region + // or card, respectively, such that a region or card with a corresponding + // 0 bit contains no part of any live object. Eliminates any remembered + // set entries that correspond to dead heap ranges. + virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0; + // Like the above, but assumes is called in parallel: "worker_num" is the + // parallel thread id of the current thread, and "claim_val" is the + // value that should be used to claim heap regions. + virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) = 0; + + // Do any "refinement" activity that might be appropriate to the given + // G1RemSet. If "refinement" has iterateive "passes", do one pass. + // If "t" is non-NULL, it is the thread performing the refinement. + // Default implementation does nothing. + virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {} + + // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, + // join and leave around parts that must be atomic wrt GC. (NULL means + // being done at a safepoint.) + virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} + + unsigned conc_refine_cards() { return _conc_refine_cards; } + + // Print any relevant summary info. + virtual void print_summary_info() {} + + // Prepare remebered set for verification. + virtual void prepare_for_verify() {}; +}; + + +// The simplest possible G1RemSet: iterates over all objects in non-CS +// regions, searching for pointers into the CS. +class StupidG1RemSet: public G1RemSet { +public: + StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {} + + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i); + + void prepare_for_oops_into_collection_set_do() {} + void cleanup_after_oops_into_collection_set_do() {} + + // Nothing is necessary in the version below. + void write_ref(HeapRegion* from, oop* p) {} + void par_write_ref(HeapRegion* from, oop* p, int tid) {} + + void scrub(BitMap* region_bm, BitMap* card_bm) {} + void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) {} + +}; + +// A G1RemSet in which each heap region has a rem set that records the +// external heap references into it. Uses a mod ref bs to track updates, +// so that they can be used to update the individual region remsets. + +class HRInto_G1RemSet: public G1RemSet { +protected: + enum SomePrivateConstants { + UpdateRStoMergeSync = 0, + MergeRStoDoDirtySync = 1, + DoDirtySync = 2, + LastSync = 3, + + SeqTask = 0, + NumSeqTasks = 1 + }; + + CardTableModRefBS* _ct_bs; + SubTasksDone* _seq_task; + G1CollectorPolicy* _g1p; + + ConcurrentG1Refine* _cg1r; + + size_t* _cards_scanned; + size_t _total_cards_scanned; + + // _par_traversal_in_progress is "true" iff a parallel traversal is in + // progress. If so, then cards added to remembered sets should also have + // their references into the collection summarized in "_new_refs". + bool _par_traversal_in_progress; + void set_par_traversal(bool b); + GrowableArray** _new_refs; + +public: + // This is called to reset dual hash tables after the gc pause + // is finished and the initial hash table is no longer being + // scanned. + void cleanupHRRS(); + + HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs); + ~HRInto_G1RemSet(); + + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i); + + void prepare_for_oops_into_collection_set_do(); + void cleanup_after_oops_into_collection_set_do(); + void scanRS(OopsInHeapRegionClosure* oc, int worker_i); + void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i); + void updateRS(int worker_i); + HeapRegion* calculateStartRegion(int i); + + HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; } + + CardTableModRefBS* ct_bs() { return _ct_bs; } + size_t cardsScanned() { return _total_cards_scanned; } + + // Record, if necessary, the fact that *p (where "p" is in region "from", + // which is required to be non-NULL) has changed to a new non-NULL value. + inline void write_ref(HeapRegion* from, oop* p); + // The "_nv" version is the same; it exists just so that it is not virtual. + inline void write_ref_nv(HeapRegion* from, oop* p); + + inline bool self_forwarded(oop obj); + inline void par_write_ref(HeapRegion* from, oop* p, int tid); + + void scrub(BitMap* region_bm, BitMap* card_bm); + void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val); + + virtual void concurrentRefinementPass(ConcurrentG1Refine* t); + virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); + + virtual void print_summary_info(); + virtual void prepare_for_verify(); +}; + +#define G1_REM_SET_LOGGING 0 + +class CountNonCleanMemRegionClosure: public MemRegionClosure { + G1CollectedHeap* _g1; + int _n; + HeapWord* _start_first; +public: + CountNonCleanMemRegionClosure(G1CollectedHeap* g1) : + _g1(g1), _n(0), _start_first(NULL) + {} + void do_MemRegion(MemRegion mr); + int n() { return _n; }; + HeapWord* start_first() { return _start_first; } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,104 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline size_t G1RemSet::n_workers() { + if (_g1->workers() != NULL) { + return _g1->workers()->total_workers(); + } else { + return 1; + } +} + +inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { + oop obj = *p; + assert(from != NULL && from->is_in_reserved(p), + "p is not in a from"); + HeapRegion* to = _g1->heap_region_containing(obj); + if (from != to && to != NULL) { + if (!to->popular() && !from->is_survivor()) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" + " for region [" PTR_FORMAT ", " PTR_FORMAT ")", + p, obj, + to->bottom(), to->end()); +#endif + assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); + if (to->rem_set()->add_reference(p)) { + _g1->schedule_popular_region_evac(to); + } + } + } +} + +inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { + write_ref_nv(from, p); +} + +inline bool HRInto_G1RemSet::self_forwarded(oop obj) { + bool result = (obj->is_forwarded() && (obj->forwardee()== obj)); + return result; +} + +inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { + oop obj = *p; +#ifdef ASSERT + // can't do because of races + // assert(obj == NULL || obj->is_oop(), "expected an oop"); + + // Do the safe subset of is_oop + if (obj != NULL) { +#ifdef CHECK_UNHANDLED_OOPS + oopDesc* o = obj.obj(); +#else + oopDesc* o = obj; +#endif // CHECK_UNHANDLED_OOPS + assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned"); + assert(Universe::heap()->is_in_reserved(obj), "must be in heap"); + } +#endif // ASSERT + assert(from == NULL || from->is_in_reserved(p), + "p is not in from"); + HeapRegion* to = _g1->heap_region_containing(obj); + // The test below could be optimized by applying a bit op to to and from. + if (to != NULL && from != NULL && from != to) { + if (!to->popular() && !from->is_survivor()) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" + " for region [" PTR_FORMAT ", " PTR_FORMAT ")", + p, obj, + to->bottom(), to->end()); +#endif + assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); + if (to->rem_set()->add_reference(p, tid)) { + _g1->schedule_popular_region_evac(to); + } + } + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + } + } +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,150 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1SATBCardTableModRefBS.cpp.incl" + +G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap, + int max_covered_regions) : + CardTableModRefBSForCTRS(whole_heap, max_covered_regions) +{ + _kind = G1SATBCT; +} + + +void G1SATBCardTableModRefBS::enqueue(oop pre_val) { + if (!JavaThread::satb_mark_queue_set().active()) return; + Thread* thr = Thread::current(); + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + jt->satb_mark_queue().enqueue(pre_val); + } else { + MutexLocker x(Shared_SATB_Q_lock); + JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val); + } +} + +// When we know the current java thread: +void +G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field, + oop newVal, + JavaThread* jt) { + if (!JavaThread::satb_mark_queue_set().active()) return; + assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop"); + oop preVal = *(oop*)field; + if (preVal != NULL) { + jt->satb_mark_queue().enqueue(preVal); + } +} + +void +G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) { + if (!JavaThread::satb_mark_queue_set().active()) return; + assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop"); + oop* elem_ptr = (oop*)mr.start(); + while ((HeapWord*)elem_ptr < mr.end()) { + oop elem = *elem_ptr; + if (elem != NULL) enqueue(elem); + elem_ptr++; + } +} + + + +G1SATBCardTableLoggingModRefBS:: +G1SATBCardTableLoggingModRefBS(MemRegion whole_heap, + int max_covered_regions) : + G1SATBCardTableModRefBS(whole_heap, max_covered_regions), + _dcqs(JavaThread::dirty_card_queue_set()) +{ + _kind = G1SATBCTLogging; +} + +void +G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field, + oop new_val) { + jbyte* byte = byte_for(field); + if (*byte != dirty_card) { + *byte = dirty_card; + Thread* thr = Thread::current(); + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + jt->dirty_card_queue().enqueue(byte); + } else { + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + _dcqs.shared_dirty_card_queue()->enqueue(byte); + } + } +} + +void +G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field, + oop new_val) { + uintptr_t field_uint = (uintptr_t)field; + uintptr_t new_val_uint = (uintptr_t)new_val; + uintptr_t comb = field_uint ^ new_val_uint; + comb = comb >> HeapRegion::LogOfHRGrainBytes; + if (comb == 0) return; + if (new_val == NULL) return; + // Otherwise, log it. + G1SATBCardTableLoggingModRefBS* g1_bs = + (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set(); + g1_bs->write_ref_field_work(field, new_val); +} + +void +G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) { + jbyte* byte = byte_for(mr.start()); + jbyte* last_byte = byte_for(mr.last()); + Thread* thr = Thread::current(); + if (whole_heap) { + while (byte <= last_byte) { + *byte = dirty_card; + byte++; + } + } else { + // Enqueue if necessary. + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + while (byte <= last_byte) { + if (*byte != dirty_card) { + *byte = dirty_card; + jt->dirty_card_queue().enqueue(byte); + } + byte++; + } + } else { + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + while (byte <= last_byte) { + if (*byte != dirty_card) { + *byte = dirty_card; + _dcqs.shared_dirty_card_queue()->enqueue(byte); + } + byte++; + } + } + } +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,107 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#ifndef SERIALGC + +class DirtyCardQueueSet; + +// This barrier is specialized to use a logging barrier to support +// snapshot-at-the-beginning marking. + +class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS { +private: + // Add "pre_val" to a set of objects that may have been disconnected from the + // pre-marking object graph. + static void enqueue(oop pre_val); + +public: + G1SATBCardTableModRefBS(MemRegion whole_heap, + int max_covered_regions); + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn); + } + + virtual bool has_write_ref_pre_barrier() { return true; } + + // This notes that we don't need to access any BarrierSet data + // structures, so this can be called from a static context. + static void write_ref_field_pre_static(void* field, oop newVal) { + assert(!UseCompressedOops, "Else needs to be templatized"); + oop preVal = *((oop*)field); + if (preVal != NULL) { + enqueue(preVal); + } + } + + // When we know the current java thread: + static void write_ref_field_pre_static(void* field, oop newVal, + JavaThread* jt); + + // We export this to make it available in cases where the static + // type of the barrier set is known. Note that it is non-virtual. + inline void inline_write_ref_field_pre(void* field, oop newVal) { + write_ref_field_pre_static(field, newVal); + } + + // This is the more general virtual version. + void write_ref_field_pre_work(void* field, oop new_val) { + inline_write_ref_field_pre(field, new_val); + } + + virtual void write_ref_array_pre(MemRegion mr); + +}; + +// Adds card-table logging to the post-barrier. +// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet. +class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS { + private: + DirtyCardQueueSet& _dcqs; + public: + G1SATBCardTableLoggingModRefBS(MemRegion whole_heap, + int max_covered_regions); + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::G1SATBCTLogging || + G1SATBCardTableModRefBS::is_a(bsn); + } + + void write_ref_field_work(void* field, oop new_val); + + // Can be called from static contexts. + static void write_ref_field_static(void* field, oop new_val); + + // NB: if you do a whole-heap invalidation, the "usual invariant" defined + // above no longer applies. + void invalidate(MemRegion mr, bool whole_heap = false); + + void write_region_work(MemRegion mr) { invalidate(mr); } + void write_ref_array_work(MemRegion mr) { invalidate(mr); } + + +}; + + +#endif // SERIALGC diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_globals.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1_globals.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,31 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1_globals.cpp.incl" + +G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \ + MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \ + MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG, \ + MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_globals.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,287 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// +// Defines all globals flags used by the garbage-first compiler. +// + +#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, manageable, product_rw) \ + \ + product(intx, ParallelGCG1AllocBufferSize, 4*K, \ + "Size of parallel G1 allocation buffers in to-space.") \ + \ + product(intx, G1TimeSliceMS, 500, \ + "Time slice for MMU specification") \ + \ + product(intx, G1MaxPauseTimeMS, 200, \ + "Max GC time per MMU time slice") \ + \ + product(intx, G1ConfidencePerc, 50, \ + "Confidence level for MMU/pause predictions") \ + \ + product(intx, G1MarkingOverheadPerc, 0, \ + "Overhead of concurrent marking") \ + \ + product(bool, G1AccountConcurrentOverhead, false, \ + "Whether soft real-time compliance in G1 will take into account" \ + "concurrent overhead") \ + \ + product(intx, G1YoungGenSize, 0, \ + "Size of the G1 young generation, 0 is the adaptive policy") \ + \ + product(bool, G1Gen, true, \ + "If true, it will enable the generational G1") \ + \ + develop(intx, G1GCPct, 10, \ + "The desired percent time spent on GC") \ + \ + product(intx, G1PolicyVerbose, 0, \ + "The verbosity level on G1 policy decisions") \ + \ + develop(bool, G1UseHRIntoRS, true, \ + "Determines whether the 'advanced' HR Into rem set is used.") \ + \ + product(bool, G1VerifyRemSet, false, \ + "If true, verify the rem set functioning at each GC") \ + \ + product(bool, G1VerifyConcMark, false, \ + "If true, verify the conc marking code at full GC time") \ + \ + develop(intx, G1MarkingVerboseLevel, 0, \ + "Level (0-4) of verboseness of the marking code") \ + \ + develop(bool, G1VerifyConcMarkPrintReachable, true, \ + "If conc mark verification fails, print reachable objects") \ + \ + develop(bool, G1TraceMarkStackOverflow, false, \ + "If true, extra debugging code for CM restart for ovflw.") \ + \ + product(bool, G1VerifyMarkingInEvac, false, \ + "If true, verify marking info during evacuation") \ + \ + develop(intx, G1PausesBtwnConcMark, -1, \ + "If positive, fixed number of pauses between conc markings") \ + \ + product(intx, G1EfficiencyPctCausesMark, 80, \ + "The cum gc efficiency since mark fall-off that causes " \ + "new marking") \ + \ + product(bool, TraceConcurrentMark, false, \ + "Trace concurrent mark") \ + \ + product(bool, SummarizeG1ConcMark, false, \ + "Summarize concurrent mark info") \ + \ + product(bool, SummarizeG1RSStats, false, \ + "Summarize remembered set processing info") \ + \ + product(bool, SummarizeG1ZFStats, false, \ + "Summarize zero-filling info") \ + \ + product(bool, TraceG1Refine, false, \ + "Trace G1 concurrent refinement") \ + \ + develop(bool, G1ConcMark, true, \ + "If true, run concurrent marking for G1") \ + \ + product(intx, G1CMStackSize, 2 * 1024 * 1024, \ + "Size of the mark stack for concurrent marking.") \ + \ + product(intx, G1CMRegionStackSize, 1024 * 1024, \ + "Size of the region stack for concurrent marking.") \ + \ + develop(bool, G1ConcRefine, true, \ + "If true, run concurrent rem set refinement for G1") \ + \ + develop(intx, G1ConcRefineTargTraversals, 4, \ + "Number of concurrent refinement we try to achieve") \ + \ + develop(intx, G1ConcRefineInitialDelta, 4, \ + "Number of heap regions of alloc ahead of starting collection " \ + "pause to start concurrent refinement (initially)") \ + \ + product(bool, G1SmoothConcRefine, true, \ + "Attempts to smooth out the overhead of concurrent refinement") \ + \ + develop(bool, G1ConcZeroFill, true, \ + "If true, run concurrent zero-filling thread") \ + \ + develop(intx, G1ConcZFMaxRegions, 1, \ + "Stop zero-filling when # of zf'd regions reaches") \ + \ + product(intx, G1SteadyStateUsed, 90, \ + "If non-0, try to maintain 'used' at this pct (of max)") \ + \ + product(intx, G1SteadyStateUsedDelta, 30, \ + "If G1SteadyStateUsed is non-0, then do pause this number of " \ + "of percentage points earlier if no marking is in progress.") \ + \ + develop(bool, G1SATBBarrierPrintNullPreVals, false, \ + "If true, count frac of ptr writes with null pre-vals.") \ + \ + product(intx, G1SATBLogBufferSize, 1*K, \ + "Number of entries in an SATB log buffer.") \ + \ + product(intx, G1SATBProcessCompletedThreshold, 20, \ + "Number of completed buffers that triggers log processing.") \ + \ + develop(intx, G1ExtraRegionSurvRate, 33, \ + "If the young survival rate is S, and there's room left in " \ + "to-space, we will allow regions whose survival rate is up to " \ + "S + (1 - S)*X, where X is this parameter (as a fraction.)") \ + \ + develop(intx, G1InitYoungSurvRatio, 50, \ + "Expected Survival Rate for newly allocated bytes") \ + \ + develop(bool, G1SATBPrintStubs, false, \ + "If true, print generated stubs for the SATB barrier") \ + \ + product(intx, G1ExpandByPctOfAvail, 20, \ + "When expanding, % of uncommitted space to claim.") \ + \ + develop(bool, G1RSBarrierRegionFilter, true, \ + "If true, generate region filtering code in RS barrier") \ + \ + develop(bool, G1RSBarrierNullFilter, true, \ + "If true, generate null-pointer filtering code in RS barrier") \ + \ + develop(bool, G1PrintCTFilterStats, false, \ + "If true, print stats on RS filtering effectiveness") \ + \ + develop(bool, G1RSBarrierUseQueue, true, \ + "If true, use queueing RS barrier") \ + \ + develop(bool, G1RSLogCheckCardTable, false, \ + "If true, verify that no dirty cards remain after RS log " \ + "processing.") \ + \ + product(intx, G1MinPausesBetweenMarks, 2, \ + "Number of inefficient pauses necessary to trigger marking.") \ + \ + product(intx, G1InefficientPausePct, 80, \ + "Threshold of an 'inefficient' pauses (as % of cum efficiency.") \ + \ + product(intx, G1RSPopLimit, 32768, \ + "Limit that defines popularity. Should go away! XXX") \ + \ + develop(bool, G1RSCountHisto, false, \ + "If true, print a histogram of RS occupancies after each pause") \ + \ + product(intx, G1ObjPopLimit, 256, \ + "Limit that defines popularity for an object.") \ + \ + product(bool, G1TraceFileOverwrite, false, \ + "Allow the trace file to be overwritten") \ + \ + develop(intx, G1PrintRegionLivenessInfo, 0, \ + "When > 0, print the occupancies of the best and worst" \ + "regions.") \ + \ + develop(bool, G1TracePopularity, false, \ + "When true, provide detailed tracing of popularity.") \ + \ + product(bool, G1SummarizePopularity, false, \ + "When true, provide end-of-run-summarization of popularity.") \ + \ + product(intx, G1NumPopularRegions, 1, \ + "Number of regions reserved to hold popular objects. " \ + "Should go away later.") \ + \ + develop(bool, G1PrintParCleanupStats, false, \ + "When true, print extra stats about parallel cleanup.") \ + \ + product(bool, G1DoAgeCohortChecks, false, \ + "When true, check well-formedness of age cohort structures.") \ + \ + develop(bool, G1DisablePreBarrier, false, \ + "Disable generation of pre-barrier (i.e., marking barrier) ") \ + \ + develop(bool, G1DisablePostBarrier, false, \ + "Disable generation of post-barrier (i.e., RS barrier) ") \ + \ + product(intx, G1DirtyCardQueueMax, 30, \ + "Maximum number of completed RS buffers before mutator threads " \ + "start processing them.") \ + \ + develop(intx, G1ConcRSLogCacheSize, 10, \ + "Log base 2 of the length of conc RS hot-card cache.") \ + \ + product(bool, G1ConcRSCountTraversals, false, \ + "If true, gather data about the number of times CR traverses " \ + "cards ") \ + \ + product(intx, G1ConcRSHotCardLimit, 4, \ + "The threshold that defines (>=) a hot card.") \ + \ + develop(bool, G1PrintOopAppls, false, \ + "When true, print applications of closures to external locs.") \ + \ + product(intx, G1LogRSRegionEntries, 7, \ + "Log_2 of max number of regions for which we keep bitmaps.") \ + \ + develop(bool, G1RecordHRRSOops, false, \ + "When true, record recent calls to rem set operations.") \ + \ + develop(bool, G1RecordHRRSEvents, false, \ + "When true, record recent calls to rem set operations.") \ + \ + develop(intx, G1MaxVerifyFailures, -1, \ + "The maximum number of verification failrues to print. " \ + "-1 means print all.") \ + \ + develop(bool, G1ScrubRemSets, true, \ + "When true, do RS scrubbing after cleanup.") \ + \ + develop(bool, G1RSScrubVerbose, false, \ + "When true, do RS scrubbing with verbose output.") \ + \ + develop(bool, G1YoungSurvRateVerbose, false, \ + "print out the survival rate of young regions according to age.") \ + \ + develop(intx, G1YoungSurvRateNumRegionsSummary, 0, \ + "the number of regions for which we'll print a surv rate " \ + "summary.") \ + \ + product(bool, G1UseScanOnlyPrefix, false, \ + "It determines whether the system will calculate an optimum " \ + "scan-only set.") \ + \ + product(intx, G1MinReservePerc, 10, \ + "It determines the minimum reserve we should have in the heap " \ + "to minimize the probability of promotion failure.") \ + \ + product(bool, G1TraceRegions, false, \ + "If set G1 will print information on which regions are being " \ + "allocated and which are reclaimed.") \ + \ + develop(bool, G1HRRSUseSparseTable, true, \ + "When true, use sparse table to save space.") \ + \ + develop(bool, G1HRRSFlushLogBuffersOnVerify, false, \ + "Forces flushing of log buffers before verification.") \ + \ + product(intx, G1MaxSurvivorRegions, 0, \ + "The maximum number of survivor regions") + +G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,64 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The following OopClosure types get specialized versions of +// "oop_oop_iterate" that invoke the closures' do_oop methods +// non-virtually, using a mechanism defined in this file. Extend these +// macros in the obvious way to add specializations for new closures. + +// Forward declarations. +enum G1Barrier { + G1BarrierNone, G1BarrierRS, G1BarrierEvac +}; + +template +class G1ParCopyClosure; +class G1ParScanClosure; + +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; + +class FilterIntoCSClosure; +class FilterOutOfRegionClosure; +class FilterInHeapRegionAndIntoCSClosure; +class FilterAndMarkInHeapRegionAndIntoCSClosure; +class G1ScanAndBalanceClosure; + +#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES +#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined." +#endif + +#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ + f(G1ParScanHeapEvacClosure,_nv) \ + f(G1ParScanClosure,_nv) \ + f(FilterIntoCSClosure,_nv) \ + f(FilterOutOfRegionClosure,_nv) \ + f(FilterInHeapRegionAndIntoCSClosure,_nv) \ + f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv) \ + f(G1ScanAndBalanceClosure,_nv) + +#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES +#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined." +#endif + +#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,874 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegion.cpp.incl" + +HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1, + HeapRegion* hr, OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + FilterKind fk) : + ContiguousSpaceDCTOC(hr, cl, precision, NULL), + _hr(hr), _fk(fk), _g1(g1) +{} + +FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r, + OopClosure* oc) : + _r_bottom(r->bottom()), _r_end(r->end()), + _oc(oc), _out_of_region(0) +{} + +class VerifyLiveClosure: public OopClosure { + G1CollectedHeap* _g1h; + CardTableModRefBS* _bs; + oop _containing_obj; + bool _failures; + int _n_failures; +public: + VerifyLiveClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _bs(NULL), _containing_obj(NULL), + _failures(false), _n_failures(0) + { + BarrierSet* bs = _g1h->barrier_set(); + if (bs->is_a(BarrierSet::CardTableModRef)) + _bs = (CardTableModRefBS*)bs; + } + + void set_containing_obj(oop obj) { + _containing_obj = obj; + } + + bool failures() { return _failures; } + int n_failures() { return _n_failures; } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + assert(_containing_obj != NULL, "Precondition"); + assert(!_g1h->is_obj_dead(_containing_obj), "Precondition"); + oop obj = *p; + if (obj != NULL) { + bool failed = false; + if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) { + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + if (!_g1h->is_in_closed_subset(obj)) { + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT + " points to obj "PTR_FORMAT + " not in the heap.", + p, (void*) _containing_obj, (void*) obj); + } else { + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT + " points to dead obj "PTR_FORMAT".", + p, (void*) _containing_obj, (void*) obj); + } + gclog_or_tty->print_cr("Live obj:"); + _containing_obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("Bad referent:"); + obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("----------"); + _failures = true; + failed = true; + _n_failures++; + } + + if (!_g1h->full_collection()) { + HeapRegion* from = _g1h->heap_region_containing(p); + HeapRegion* to = _g1h->heap_region_containing(*p); + if (from != NULL && to != NULL && + from != to && + !to->popular() && + !to->isHumongous()) { + jbyte cv_obj = *_bs->byte_for_const(_containing_obj); + jbyte cv_field = *_bs->byte_for_const(p); + const jbyte dirty = CardTableModRefBS::dirty_card_val(); + + bool is_bad = !(from->is_young() + || to->rem_set()->contains_reference(p) + || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed + (_containing_obj->is_objArray() ? + cv_field == dirty + : cv_obj == dirty || cv_field == dirty)); + if (is_bad) { + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + gclog_or_tty->print_cr("Missing rem set entry:"); + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of obj "PTR_FORMAT + ", in region %d ["PTR_FORMAT + ", "PTR_FORMAT"),", + p, (void*) _containing_obj, + from->hrs_index(), + from->bottom(), + from->end()); + _containing_obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("points to obj "PTR_FORMAT + " in region %d ["PTR_FORMAT + ", "PTR_FORMAT").", + (void*) obj, to->hrs_index(), + to->bottom(), to->end()); + obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.", + cv_obj, cv_field); + gclog_or_tty->print_cr("----------"); + _failures = true; + if (!failed) _n_failures++; + } + } + } + } + } +}; + +template +HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h, + HeapRegion* hr, + HeapWord* cur, HeapWord* top) { + oop cur_oop = oop(cur); + int oop_size = cur_oop->size(); + HeapWord* next_obj = cur + oop_size; + while (next_obj < top) { + // Keep filtering the remembered set. + if (!g1h->is_obj_dead(cur_oop, hr)) { + // Bottom lies entirely below top, so we can call the + // non-memRegion version of oop_iterate below. +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + cur_oop->oop_iterate(&vl_cl); + } +#endif + cur_oop->oop_iterate(cl); + } + cur = next_obj; + cur_oop = oop(cur); + oop_size = cur_oop->size(); + next_obj = cur + oop_size; + } + return cur; +} + +void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, + HeapWord* top, + OopClosure* cl) { + G1CollectedHeap* g1h = _g1; + + int oop_size; + + OopClosure* cl2 = cl; + FilterIntoCSClosure intoCSFilt(this, g1h, cl); + FilterOutOfRegionClosure outOfRegionFilt(_hr, cl); + switch (_fk) { + case IntoCSFilterKind: cl2 = &intoCSFilt; break; + case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break; + } + + // Start filtering what we add to the remembered set. If the object is + // not considered dead, either because it is marked (in the mark bitmap) + // or it was allocated after marking finished, then we add it. Otherwise + // we can safely ignore the object. + if (!g1h->is_obj_dead(oop(bottom), _hr)) { +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + oop(bottom)->oop_iterate(&vl_cl, mr); + } +#endif + oop_size = oop(bottom)->oop_iterate(cl2, mr); + } else { + oop_size = oop(bottom)->size(); + } + + bottom += oop_size; + + if (bottom < top) { + // We replicate the loop below for several kinds of possible filters. + switch (_fk) { + case NoFilterKind: + bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top); + break; + case IntoCSFilterKind: { + FilterIntoCSClosure filt(this, g1h, cl); + bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); + break; + } + case OutOfRegionFilterKind: { + FilterOutOfRegionClosure filt(_hr, cl); + bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); + break; + } + default: + ShouldNotReachHere(); + } + + // Last object. Need to do dead-obj filtering here too. + if (!g1h->is_obj_dead(oop(bottom), _hr)) { +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + oop(bottom)->oop_iterate(&vl_cl, mr); + } +#endif + oop(bottom)->oop_iterate(cl2, mr); + } + } +} + +void HeapRegion::reset_after_compaction() { + G1OffsetTableContigSpace::reset_after_compaction(); + // After a compaction the mark bitmap is invalid, so we must + // treat all objects as being inside the unmarked area. + zero_marked_bytes(); + init_top_at_mark_start(); +} + + + +DirtyCardToOopClosure* +HeapRegion::new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapRegionDCTOC::FilterKind fk) { + return new HeapRegionDCTOC(G1CollectedHeap::heap(), + this, cl, precision, fk); +} + +void HeapRegion::hr_clear(bool par, bool clear_space) { + _humongous = false; + _humongous_start = false; + _humongous_start_region = NULL; + _in_collection_set = false; + _is_gc_alloc_region = false; + + // Age stuff (if parallel, this will be done separately, since it needs + // to be sequential). + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + set_young_index_in_cset(-1); + uninstall_surv_rate_group(); + set_young_type(NotYoung); + + // In case it had been the start of a humongous sequence, reset its end. + set_end(_orig_end); + + if (!par) { + // If this is parallel, this will be done later. + HeapRegionRemSet* hrrs = rem_set(); + if (hrrs != NULL) hrrs->clear(); + _claimed = 0; + } + zero_marked_bytes(); + set_sort_index(-1); + if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary()) + set_popular(false); + + _offsets.resize(HeapRegion::GrainWords); + init_top_at_mark_start(); + if (clear_space) clear(); +} + +// +void HeapRegion::calc_gc_efficiency() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _gc_efficiency = (double) garbage_bytes() / + g1h->predict_region_elapsed_time_ms(this, false); +} +// + +void HeapRegion::set_startsHumongous() { + _humongous_start = true; _humongous = true; + _humongous_start_region = this; + assert(end() == _orig_end, "Should be normal before alloc."); +} + +bool HeapRegion::claimHeapRegion(jint claimValue) { + jint current = _claimed; + if (current != claimValue) { + jint res = Atomic::cmpxchg(claimValue, &_claimed, current); + if (res == current) { + return true; + } + } + return false; +} + +HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) { + HeapWord* low = addr; + HeapWord* high = end(); + while (low < high) { + size_t diff = pointer_delta(high, low); + // Must add one below to bias toward the high amount. Otherwise, if + // "high" were at the desired value, and "low" were one less, we + // would not converge on "high". This is not symmetric, because + // we set "high" to a block start, which might be the right one, + // which we don't do for "low". + HeapWord* middle = low + (diff+1)/2; + if (middle == high) return high; + HeapWord* mid_bs = block_start_careful(middle); + if (mid_bs < addr) { + low = middle; + } else { + high = mid_bs; + } + } + assert(low == high && low >= addr, "Didn't work."); + return low; +} + +void HeapRegion::set_next_on_unclean_list(HeapRegion* r) { + assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list."); + _next_in_special_set = r; +} + +void HeapRegion::set_on_unclean_list(bool b) { + _is_on_unclean_list = b; +} + +void HeapRegion::initialize(MemRegion mr, bool clear_space) { + G1OffsetTableContigSpace::initialize(mr, false); + hr_clear(false/*par*/, clear_space); +} +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +HeapRegion:: +HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed) + : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed), + _next_fk(HeapRegionDCTOC::NoFilterKind), + _hrs_index(-1), + _humongous(false), _humongous_start(false), _humongous_start_region(NULL), + _in_collection_set(false), _is_gc_alloc_region(false), + _is_on_free_list(false), _is_on_unclean_list(false), + _next_in_special_set(NULL), _orig_end(NULL), + _claimed(0), _evacuation_failed(false), + _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1), + _popularity(NotPopular), + _young_type(NotYoung), _next_young_region(NULL), + _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), + _rem_set(NULL), _zfs(NotZeroFilled) +{ + _orig_end = mr.end(); + // Note that initialize() will set the start of the unmarked area of the + // region. + this->initialize(mr, !is_zeroed); + + _rem_set = new HeapRegionRemSet(sharedOffsetArray, this); + + assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant."); + // In case the region is allocated during a pause, note the top. + // We haven't done any counting on a brand new region. + _top_at_conc_mark_count = bottom(); +} + +class NextCompactionHeapRegionClosure: public HeapRegionClosure { + const HeapRegion* _target; + bool _target_seen; + HeapRegion* _last; + CompactibleSpace* _res; +public: + NextCompactionHeapRegionClosure(const HeapRegion* target) : + _target(target), _target_seen(false), _res(NULL) {} + bool doHeapRegion(HeapRegion* cur) { + if (_target_seen) { + if (!cur->isHumongous()) { + _res = cur; + return true; + } + } else if (cur == _target) { + _target_seen = true; + } + return false; + } + CompactibleSpace* result() { return _res; } +}; + +CompactibleSpace* HeapRegion::next_compaction_space() const { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // cast away const-ness + HeapRegion* r = (HeapRegion*) this; + NextCompactionHeapRegionClosure blk(r); + g1h->heap_region_iterate_from(r, &blk); + return blk.result(); +} + +void HeapRegion::set_continuesHumongous(HeapRegion* start) { + // The order is important here. + start->add_continuingHumongousRegion(this); + _humongous = true; _humongous_start = false; + _humongous_start_region = start; +} + +void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) { + // Must join the blocks of the current H region seq with the block of the + // added region. + offsets()->join_blocks(bottom(), cont->bottom()); + arrayOop obj = (arrayOop)(bottom()); + obj->set_length((int) (obj->length() + cont->capacity()/jintSize)); + set_end(cont->end()); + set_top(cont->end()); +} + +void HeapRegion::save_marks() { + set_saved_mark(); +} + +void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) { + HeapWord* p = mr.start(); + HeapWord* e = mr.end(); + oop obj; + while (p < e) { + obj = oop(p); + p += obj->oop_iterate(cl); + } + assert(p == e, "bad memregion: doesn't end on obj boundary"); +} + +#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \ +void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \ + ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl); \ +} +SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN) + + +void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) { + oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl); +} + +#ifdef DEBUG +HeapWord* HeapRegion::allocate(size_t size) { + jint state = zero_fill_state(); + assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() || + zero_fill_is_allocated(), + "When ZF is on, only alloc in ZF'd regions"); + return G1OffsetTableContigSpace::allocate(size); +} +#endif + +void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) { + assert(top() == bottom() || zfs == Allocated, + "Region must be empty, or we must be setting it to allocated."); + assert(ZF_mon->owned_by_self() || + Universe::heap()->is_gc_active(), + "Must hold the lock or be a full GC to modify."); + _zfs = zfs; +} + +void HeapRegion::set_zero_fill_complete() { + set_zero_fill_state_work(ZeroFilled); + if (ZF_mon->owned_by_self()) { + ZF_mon->notify_all(); + } +} + + +void HeapRegion::ensure_zero_filled() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + ensure_zero_filled_locked(); +} + +void HeapRegion::ensure_zero_filled_locked() { + assert(ZF_mon->owned_by_self(), "Precondition"); + bool should_ignore_zf = SafepointSynchronize::is_at_safepoint(); + assert(should_ignore_zf || Heap_lock->is_locked(), + "Either we're in a GC or we're allocating a region."); + switch (zero_fill_state()) { + case HeapRegion::NotZeroFilled: + set_zero_fill_in_progress(Thread::current()); + { + ZF_mon->unlock(); + Copy::fill_to_words(bottom(), capacity()/HeapWordSize); + ZF_mon->lock_without_safepoint_check(); + } + // A trap. + guarantee(zero_fill_state() == HeapRegion::ZeroFilling + && zero_filler() == Thread::current(), + "AHA! Tell Dave D if you see this..."); + set_zero_fill_complete(); + // gclog_or_tty->print_cr("Did sync ZF."); + ConcurrentZFThread::note_sync_zfs(); + break; + case HeapRegion::ZeroFilling: + if (should_ignore_zf) { + // We can "break" the lock and take over the work. + Copy::fill_to_words(bottom(), capacity()/HeapWordSize); + set_zero_fill_complete(); + ConcurrentZFThread::note_sync_zfs(); + break; + } else { + ConcurrentZFThread::wait_for_ZF_completed(this); + } + case HeapRegion::ZeroFilled: + // Nothing to do. + break; + case HeapRegion::Allocated: + guarantee(false, "Should not call on allocated regions."); + } + assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post"); +} + +HeapWord* +HeapRegion::object_iterate_mem_careful(MemRegion mr, + ObjectClosure* cl) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // We used to use "block_start_careful" here. But we're actually happy + // to update the BOT while we do this... + HeapWord* cur = block_start(mr.start()); + mr = mr.intersection(used_region()); + if (mr.is_empty()) return NULL; + // Otherwise, find the obj that extends onto mr.start(). + + assert(cur <= mr.start() + && (oop(cur)->klass() == NULL || + cur + oop(cur)->size() > mr.start()), + "postcondition of block_start"); + oop obj; + while (cur < mr.end()) { + obj = oop(cur); + if (obj->klass() == NULL) { + // Ran into an unparseable point. + return cur; + } else if (!g1h->is_obj_dead(obj)) { + cl->do_object(obj); + } + if (cl->abort()) return cur; + // The check above must occur before the operation below, since an + // abort might invalidate the "size" operation. + cur += obj->size(); + } + return NULL; +} + +HeapWord* +HeapRegion:: +oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If we're within a stop-world GC, then we might look at a card in a + // GC alloc region that extends onto a GC LAB, which may not be + // parseable. Stop such at the "saved_mark" of the region. + if (G1CollectedHeap::heap()->is_gc_active()) { + mr = mr.intersection(used_region_at_save_marks()); + } else { + mr = mr.intersection(used_region()); + } + if (mr.is_empty()) return NULL; + // Otherwise, find the obj that extends onto mr.start(). + + // We used to use "block_start_careful" here. But we're actually happy + // to update the BOT while we do this... + HeapWord* cur = block_start(mr.start()); + assert(cur <= mr.start(), "Postcondition"); + + while (cur <= mr.start()) { + if (oop(cur)->klass() == NULL) { + // Ran into an unparseable point. + return cur; + } + // Otherwise... + int sz = oop(cur)->size(); + if (cur + sz > mr.start()) break; + // Otherwise, go on. + cur = cur + sz; + } + oop obj; + obj = oop(cur); + // If we finish this loop... + assert(cur <= mr.start() + && obj->klass() != NULL + && cur + obj->size() > mr.start(), + "Loop postcondition"); + if (!g1h->is_obj_dead(obj)) { + obj->oop_iterate(cl, mr); + } + + HeapWord* next; + while (cur < mr.end()) { + obj = oop(cur); + if (obj->klass() == NULL) { + // Ran into an unparseable point. + return cur; + }; + // Otherwise: + next = (cur + obj->size()); + if (!g1h->is_obj_dead(obj)) { + if (next < mr.end()) { + obj->oop_iterate(cl); + } else { + // this obj spans the boundary. If it's an array, stop at the + // boundary. + if (obj->is_objArray()) { + obj->oop_iterate(cl, mr); + } else { + obj->oop_iterate(cl); + } + } + } + cur = next; + } + return NULL; +} + +void HeapRegion::print() const { print_on(gclog_or_tty); } +void HeapRegion::print_on(outputStream* st) const { + if (isHumongous()) { + if (startsHumongous()) + st->print(" HS"); + else + st->print(" HC"); + } else { + st->print(" "); + } + if (in_collection_set()) + st->print(" CS"); + else if (is_gc_alloc_region()) + st->print(" A "); + else + st->print(" "); + if (is_young()) + st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y ")); + else + st->print(" "); + if (is_empty()) + st->print(" F"); + else + st->print(" "); + st->print(" %d", _gc_time_stamp); + G1OffsetTableContigSpace::print_on(st); +} + +#define OBJ_SAMPLE_INTERVAL 0 +#define BLOCK_SAMPLE_INTERVAL 100 + +// This really ought to be commoned up into OffsetTableContigSpace somehow. +// We would need a mechanism to make that code skip dead objects. + +void HeapRegion::verify(bool allow_dirty) const { + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + HeapWord* p = bottom(); + HeapWord* prev_p = NULL; + int objs = 0; + int blocks = 0; + VerifyLiveClosure vl_cl(g1); + while (p < top()) { + size_t size = oop(p)->size(); + if (blocks == BLOCK_SAMPLE_INTERVAL) { + guarantee(p == block_start_const(p + (size/2)), + "check offset computation"); + blocks = 0; + } else { + blocks++; + } + if (objs == OBJ_SAMPLE_INTERVAL) { + oop obj = oop(p); + if (!g1->is_obj_dead(obj, this)) { + obj->verify(); + vl_cl.set_containing_obj(obj); + obj->oop_iterate(&vl_cl); + if (G1MaxVerifyFailures >= 0 + && vl_cl.n_failures() >= G1MaxVerifyFailures) break; + } + objs = 0; + } else { + objs++; + } + prev_p = p; + p += size; + } + HeapWord* rend = end(); + HeapWord* rtop = top(); + if (rtop < rend) { + guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop, + "check offset computation"); + } + if (vl_cl.failures()) { + gclog_or_tty->print_cr("Heap:"); + G1CollectedHeap::heap()->print(); + gclog_or_tty->print_cr(""); + } + if (G1VerifyConcMark && + G1VerifyConcMarkPrintReachable && + vl_cl.failures()) { + g1->concurrent_mark()->print_prev_bitmap_reachable(); + } + guarantee(!vl_cl.failures(), "should not have had any failures"); + guarantee(p == top(), "end of last object must match end of space"); +} + +// G1OffsetTableContigSpace code; copied from space.cpp. Hope this can go +// away eventually. + +void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space) { + // false ==> we'll do the clearing if there's clearing to be done. + ContiguousSpace::initialize(mr, false); + _offsets.zero_bottom_entry(); + _offsets.initialize_threshold(); + if (clear_space) clear(); +} + +void G1OffsetTableContigSpace::clear() { + ContiguousSpace::clear(); + _offsets.zero_bottom_entry(); + _offsets.initialize_threshold(); +} + +void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) { + Space::set_bottom(new_bottom); + _offsets.set_bottom(new_bottom); +} + +void G1OffsetTableContigSpace::set_end(HeapWord* new_end) { + Space::set_end(new_end); + _offsets.resize(new_end - bottom()); +} + +void G1OffsetTableContigSpace::print() const { + print_short(); + gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " + INTPTR_FORMAT ", " INTPTR_FORMAT ")", + bottom(), top(), _offsets.threshold(), end()); +} + +HeapWord* G1OffsetTableContigSpace::initialize_threshold() { + return _offsets.initialize_threshold(); +} + +HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start, + HeapWord* end) { + _offsets.alloc_block(start, end); + return _offsets.threshold(); +} + +HeapWord* G1OffsetTableContigSpace::saved_mark_word() const { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" ); + if (_gc_time_stamp < g1h->get_gc_time_stamp()) + return top(); + else + return ContiguousSpace::saved_mark_word(); +} + +void G1OffsetTableContigSpace::set_saved_mark() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp(); + + if (_gc_time_stamp < curr_gc_time_stamp) { + // The order of these is important, as another thread might be + // about to start scanning this region. If it does so after + // set_saved_mark and before _gc_time_stamp = ..., then the latter + // will be false, and it will pick up top() as the high water mark + // of region. If it does so after _gc_time_stamp = ..., then it + // will pick up the right saved_mark_word() as the high water mark + // of the region. Either way, the behaviour will be correct. + ContiguousSpace::set_saved_mark(); + OrderAccess::release_store_ptr((volatile intptr_t*) &_gc_time_stamp, + (intptr_t) curr_gc_time_stamp); + } +} + +G1OffsetTableContigSpace:: +G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed) : + _offsets(sharedOffsetArray, mr), + _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true), + _gc_time_stamp(0) +{ + _offsets.set_space(this); + initialize(mr, !is_zeroed); +} + +size_t RegionList::length() { + size_t len = 0; + HeapRegion* cur = hd(); + DEBUG_ONLY(HeapRegion* last = NULL); + while (cur != NULL) { + len++; + DEBUG_ONLY(last = cur); + cur = get_next(cur); + } + assert(last == tl(), "Invariant"); + return len; +} + +void RegionList::insert_before_head(HeapRegion* r) { + assert(well_formed(), "Inv"); + set_next(r, hd()); + _hd = r; + _sz++; + if (tl() == NULL) _tl = r; + assert(well_formed(), "Inv"); +} + +void RegionList::prepend_list(RegionList* new_list) { + assert(well_formed(), "Precondition"); + assert(new_list->well_formed(), "Precondition"); + HeapRegion* new_tl = new_list->tl(); + if (new_tl != NULL) { + set_next(new_tl, hd()); + _hd = new_list->hd(); + _sz += new_list->sz(); + if (tl() == NULL) _tl = new_list->tl(); + } else { + assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv"); + } + assert(well_formed(), "Inv"); +} + +void RegionList::delete_after(HeapRegion* r) { + assert(well_formed(), "Precondition"); + HeapRegion* next = get_next(r); + assert(r != NULL, "Precondition"); + HeapRegion* next_tl = get_next(next); + set_next(r, next_tl); + dec_sz(); + if (next == tl()) { + assert(next_tl == NULL, "Inv"); + _tl = r; + } + assert(well_formed(), "Inv"); +} + +HeapRegion* RegionList::pop() { + assert(well_formed(), "Inv"); + HeapRegion* res = hd(); + if (res != NULL) { + _hd = get_next(res); + _sz--; + set_next(res, NULL); + if (sz() == 0) _tl = NULL; + } + assert(well_formed(), "Inv"); + return res; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,924 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#ifndef SERIALGC + +// A HeapRegion is the smallest piece of a G1CollectedHeap that +// can be collected independently. + +// NOTE: Although a HeapRegion is a Space, its +// Space::initDirtyCardClosure method must not be called. +// The problem is that the existence of this method breaks +// the independence of barrier sets from remembered sets. +// The solution is to remove this method from the definition +// of a Space. + +class CompactibleSpace; +class ContiguousSpace; +class HeapRegionRemSet; +class HeapRegionRemSetIterator; +class HeapRegion; + +// A dirty card to oop closure for heap regions. It +// knows how to get the G1 heap and how to use the bitmap +// in the concurrent marker used by G1 to filter remembered +// sets. + +class HeapRegionDCTOC : public ContiguousSpaceDCTOC { +public: + // Specification of possible DirtyCardToOopClosure filtering. + enum FilterKind { + NoFilterKind, + IntoCSFilterKind, + OutOfRegionFilterKind + }; + +protected: + HeapRegion* _hr; + FilterKind _fk; + G1CollectedHeap* _g1; + + void walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, HeapWord* top, + OopClosure* cl); + + // We don't specialize this for FilteringClosure; filtering is handled by + // the "FilterKind" mechanism. But we provide this to avoid a compiler + // warning. + void walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, HeapWord* top, + FilteringClosure* cl) { + HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top, + (OopClosure*)cl); + } + + // Get the actual top of the area on which the closure will + // operate, given where the top is assumed to be (the end of the + // memory region passed to do_MemRegion) and where the object + // at the top is assumed to start. For example, an object may + // start at the top but actually extend past the assumed top, + // in which case the top becomes the end of the object. + HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) { + return ContiguousSpaceDCTOC::get_actual_top(top, top_obj); + } + + // Walk the given memory region from bottom to (actual) top + // looking for objects and applying the oop closure (_cl) to + // them. The base implementation of this treats the area as + // blocks, where a block may or may not be an object. Sub- + // classes should override this to provide more accurate + // or possibly more efficient walking. + void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) { + Filtering_DCTOC::walk_mem_region(mr, bottom, top); + } + +public: + HeapRegionDCTOC(G1CollectedHeap* g1, + HeapRegion* hr, OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + FilterKind fk); +}; + + +// The complicating factor is that BlockOffsetTable diverged +// significantly, and we need functionality that is only in the G1 version. +// So I copied that code, which led to an alternate G1 version of +// OffsetTableContigSpace. If the two versions of BlockOffsetTable could +// be reconciled, then G1OffsetTableContigSpace could go away. + +// The idea behind time stamps is the following. Doing a save_marks on +// all regions at every GC pause is time consuming (if I remember +// well, 10ms or so). So, we would like to do that only for regions +// that are GC alloc regions. To achieve this, we use time +// stamps. For every evacuation pause, G1CollectedHeap generates a +// unique time stamp (essentially a counter that gets +// incremented). Every time we want to call save_marks on a region, +// we set the saved_mark_word to top and also copy the current GC +// time stamp to the time stamp field of the space. Reading the +// saved_mark_word involves checking the time stamp of the +// region. If it is the same as the current GC time stamp, then we +// can safely read the saved_mark_word field, as it is valid. If the +// time stamp of the region is not the same as the current GC time +// stamp, then we instead read top, as the saved_mark_word field is +// invalid. Time stamps (on the regions and also on the +// G1CollectedHeap) are reset at every cleanup (we iterate over +// the regions anyway) and at the end of a Full GC. The current scheme +// that uses sequential unsigned ints will fail only if we have 4b +// evacuation pauses between two cleanups, which is _highly_ unlikely. + +class G1OffsetTableContigSpace: public ContiguousSpace { + friend class VMStructs; + protected: + G1BlockOffsetArrayContigSpace _offsets; + Mutex _par_alloc_lock; + volatile unsigned _gc_time_stamp; + + public: + // Constructor. If "is_zeroed" is true, the MemRegion "mr" may be + // assumed to contain zeros. + G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed = false); + + void set_bottom(HeapWord* value); + void set_end(HeapWord* value); + + virtual HeapWord* saved_mark_word() const; + virtual void set_saved_mark(); + void reset_gc_time_stamp() { _gc_time_stamp = 0; } + + virtual void initialize(MemRegion mr, bool clear_space); + virtual void clear(); + + HeapWord* block_start(const void* p); + HeapWord* block_start_const(const void* p) const; + + // Add offset table update. + virtual HeapWord* allocate(size_t word_size); + HeapWord* par_allocate(size_t word_size); + + // MarkSweep support phase3 + virtual HeapWord* initialize_threshold(); + virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end); + + virtual void print() const; +}; + +class HeapRegion: public G1OffsetTableContigSpace { + friend class VMStructs; + private: + + // The next filter kind that should be used for a "new_dcto_cl" call with + // the "traditional" signature. + HeapRegionDCTOC::FilterKind _next_fk; + + // Requires that the region "mr" be dense with objects, and begin and end + // with an object. + void oops_in_mr_iterate(MemRegion mr, OopClosure* cl); + + // The remembered set for this region. + // (Might want to make this "inline" later, to avoid some alloc failure + // issues.) + HeapRegionRemSet* _rem_set; + + G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; } + + protected: + // If this region is a member of a HeapRegionSeq, the index in that + // sequence, otherwise -1. + int _hrs_index; + + bool _humongous; // starts or continues a humongous object + bool _humongous_start; // starts a humongous object + // For a humongous region, region in which it starts. + HeapRegion* _humongous_start_region; + // For the start region of a humongous sequence, it's original end(). + HeapWord* _orig_end; + + // True iff the region is in current collection_set. + bool _in_collection_set; + + // True iff the region is on the unclean list, waiting to be zero filled. + bool _is_on_unclean_list; + + // True iff the region is on the free list, ready for allocation. + bool _is_on_free_list; + + // Is this or has it been an allocation region in the current collection + // pause. + bool _is_gc_alloc_region; + + // True iff an attempt to evacuate an object in the region failed. + bool _evacuation_failed; + + // A heap region may be a member one of a number of special subsets, each + // represented as linked lists through the field below. Currently, these + // sets include: + // The collection set. + // The set of allocation regions used in a collection pause. + // Spaces that may contain gray objects. + HeapRegion* _next_in_special_set; + + // next region in the young "generation" region set + HeapRegion* _next_young_region; + + // For parallel heapRegion traversal. + jint _claimed; + + // We use concurrent marking to determine the amount of live data + // in each heap region. + size_t _prev_marked_bytes; // Bytes known to be live via last completed marking. + size_t _next_marked_bytes; // Bytes known to be live via in-progress marking. + + // See "sort_index" method. -1 means is not in the array. + int _sort_index; + + // Means it has (or at least had) a very large RS, and should not be + // considered for membership in a collection set. + enum PopularityState { + NotPopular, + PopularPending, + Popular + }; + PopularityState _popularity; + + // + double _gc_efficiency; + // + + enum YoungType { + NotYoung, // a region is not young + ScanOnly, // a region is young and scan-only + Young, // a region is young + Survivor // a region is young and it contains + // survivor + }; + + YoungType _young_type; + int _young_index_in_cset; + SurvRateGroup* _surv_rate_group; + int _age_index; + + // The start of the unmarked area. The unmarked area extends from this + // word until the top and/or end of the region, and is the part + // of the region for which no marking was done, i.e. objects may + // have been allocated in this part since the last mark phase. + // "prev" is the top at the start of the last completed marking. + // "next" is the top at the start of the in-progress marking (if any.) + HeapWord* _prev_top_at_mark_start; + HeapWord* _next_top_at_mark_start; + // If a collection pause is in progress, this is the top at the start + // of that pause. + + // We've counted the marked bytes of objects below here. + HeapWord* _top_at_conc_mark_count; + + void init_top_at_mark_start() { + assert(_prev_marked_bytes == 0 && + _next_marked_bytes == 0, + "Must be called after zero_marked_bytes."); + HeapWord* bot = bottom(); + _prev_top_at_mark_start = bot; + _next_top_at_mark_start = bot; + _top_at_conc_mark_count = bot; + } + + jint _zfs; // A member of ZeroFillState. Protected by ZF_lock. + Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last) + // made it so. + + void set_young_type(YoungType new_type) { + //assert(_young_type != new_type, "setting the same type" ); + // TODO: add more assertions here + _young_type = new_type; + } + + public: + // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros. + HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed); + + enum SomePublicConstants { + // HeapRegions are GrainBytes-aligned + // and have sizes that are multiples of GrainBytes. + LogOfHRGrainBytes = 20, + LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize, + GrainBytes = 1 << LogOfHRGrainBytes, + GrainWords = 1 <= marked_bytes(), + "Can't mark more than we have."); + return used_at_mark_start_bytes - marked_bytes(); + } + + // An upper bound on the number of live bytes in the region. + size_t max_live_bytes() { return used() - garbage_bytes(); } + + void add_to_marked_bytes(size_t incr_bytes) { + _next_marked_bytes = _next_marked_bytes + incr_bytes; + guarantee( _next_marked_bytes <= used(), "invariant" ); + } + + void zero_marked_bytes() { + _prev_marked_bytes = _next_marked_bytes = 0; + } + + bool isHumongous() const { return _humongous; } + bool startsHumongous() const { return _humongous_start; } + bool continuesHumongous() const { return _humongous && ! _humongous_start; } + // For a humongous region, region in which it starts. + HeapRegion* humongous_start_region() const { + return _humongous_start_region; + } + + // Causes the current region to represent a humongous object spanning "n" + // regions. + virtual void set_startsHumongous(); + + // The regions that continue a humongous sequence should be added using + // this method, in increasing address order. + void set_continuesHumongous(HeapRegion* start); + + void add_continuingHumongousRegion(HeapRegion* cont); + + // If the region has a remembered set, return a pointer to it. + HeapRegionRemSet* rem_set() const { + return _rem_set; + } + + // True iff the region is in current collection_set. + bool in_collection_set() const { + return _in_collection_set; + } + void set_in_collection_set(bool b) { + _in_collection_set = b; + } + HeapRegion* next_in_collection_set() { + assert(in_collection_set(), "should only invoke on member of CS."); + assert(_next_in_special_set == NULL || + _next_in_special_set->in_collection_set(), + "Malformed CS."); + return _next_in_special_set; + } + void set_next_in_collection_set(HeapRegion* r) { + assert(in_collection_set(), "should only invoke on member of CS."); + assert(r == NULL || r->in_collection_set(), "Malformed CS."); + _next_in_special_set = r; + } + + // True iff it is or has been an allocation region in the current + // collection pause. + bool is_gc_alloc_region() const { + return _is_gc_alloc_region; + } + void set_is_gc_alloc_region(bool b) { + _is_gc_alloc_region = b; + } + HeapRegion* next_gc_alloc_region() { + assert(is_gc_alloc_region(), "should only invoke on member of CS."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_gc_alloc_region(), + "Malformed CS."); + return _next_in_special_set; + } + void set_next_gc_alloc_region(HeapRegion* r) { + assert(is_gc_alloc_region(), "should only invoke on member of CS."); + assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS."); + _next_in_special_set = r; + } + + bool is_reserved() { + return popular(); + } + + bool is_on_free_list() { + return _is_on_free_list; + } + + void set_on_free_list(bool b) { + _is_on_free_list = b; + } + + HeapRegion* next_from_free_list() { + assert(is_on_free_list(), + "Should only invoke on free space."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_on_free_list(), + "Malformed Free List."); + return _next_in_special_set; + } + + void set_next_on_free_list(HeapRegion* r) { + assert(r == NULL || r->is_on_free_list(), "Malformed free list."); + _next_in_special_set = r; + } + + bool is_on_unclean_list() { + return _is_on_unclean_list; + } + + void set_on_unclean_list(bool b); + + HeapRegion* next_from_unclean_list() { + assert(is_on_unclean_list(), + "Should only invoke on unclean space."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_on_unclean_list(), + "Malformed unclean List."); + return _next_in_special_set; + } + + void set_next_on_unclean_list(HeapRegion* r); + + HeapRegion* get_next_young_region() { return _next_young_region; } + void set_next_young_region(HeapRegion* hr) { + _next_young_region = hr; + } + + // Allows logical separation between objects allocated before and after. + void save_marks(); + + // Reset HR stuff to default values. + void hr_clear(bool par, bool clear_space); + + void initialize(MemRegion mr, bool clear_space); + + // Ensure that "this" is zero-filled. + void ensure_zero_filled(); + // This one requires that the calling thread holds ZF_mon. + void ensure_zero_filled_locked(); + + // Get the start of the unmarked area in this region. + HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; } + HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; } + + // Apply "cl->do_oop" to (the addresses of) all reference fields in objects + // allocated in the current region before the last call to "save_mark". + void oop_before_save_marks_iterate(OopClosure* cl); + + // This call determines the "filter kind" argument that will be used for + // the next call to "new_dcto_cl" on this region with the "traditional" + // signature (i.e., the call below.) The default, in the absence of a + // preceding call to this method, is "NoFilterKind", and a call to this + // method is necessary for each such call, or else it reverts to the + // default. + // (This is really ugly, but all other methods I could think of changed a + // lot of main-line code for G1.) + void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) { + _next_fk = nfk; + } + + DirtyCardToOopClosure* + new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapRegionDCTOC::FilterKind fk); + +#if WHASSUP + DirtyCardToOopClosure* + new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapWord* boundary) { + assert(boundary == NULL, "This arg doesn't make sense here."); + DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk); + _next_fk = HeapRegionDCTOC::NoFilterKind; + return res; + } +#endif + + // + // Note the start or end of marking. This tells the heap region + // that the collector is about to start or has finished (concurrently) + // marking the heap. + // + + // Note the start of a marking phase. Record the + // start of the unmarked area of the region here. + void note_start_of_marking(bool during_initial_mark) { + init_top_at_conc_mark_count(); + _next_marked_bytes = 0; + if (during_initial_mark && is_young() && !is_survivor()) + _next_top_at_mark_start = bottom(); + else + _next_top_at_mark_start = top(); + } + + // Note the end of a marking phase. Install the start of + // the unmarked area that was captured at start of marking. + void note_end_of_marking() { + _prev_top_at_mark_start = _next_top_at_mark_start; + _prev_marked_bytes = _next_marked_bytes; + _next_marked_bytes = 0; + + guarantee(_prev_marked_bytes <= + (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize, + "invariant"); + } + + // After an evacuation, we need to update _next_top_at_mark_start + // to be the current top. Note this is only valid if we have only + // ever evacuated into this region. If we evacuate, allocate, and + // then evacuate we are in deep doodoo. + void note_end_of_copying() { + assert(top() >= _next_top_at_mark_start, + "Increase only"); + _next_top_at_mark_start = top(); + } + + // Returns "false" iff no object in the region was allocated when the + // last mark phase ended. + bool is_marked() { return _prev_top_at_mark_start != bottom(); } + + // If "is_marked()" is true, then this is the index of the region in + // an array constructed at the end of marking of the regions in a + // "desirability" order. + int sort_index() { + return _sort_index; + } + void set_sort_index(int i) { + _sort_index = i; + } + + void init_top_at_conc_mark_count() { + _top_at_conc_mark_count = bottom(); + } + + void set_top_at_conc_mark_count(HeapWord *cur) { + assert(bottom() <= cur && cur <= end(), "Sanity."); + _top_at_conc_mark_count = cur; + } + + HeapWord* top_at_conc_mark_count() { + return _top_at_conc_mark_count; + } + + void reset_during_compaction() { + guarantee( isHumongous() && startsHumongous(), + "should only be called for humongous regions"); + + zero_marked_bytes(); + init_top_at_mark_start(); + } + + bool popular() { return _popularity == Popular; } + void set_popular(bool b) { + if (b) { + _popularity = Popular; + } else { + _popularity = NotPopular; + } + } + bool popular_pending() { return _popularity == PopularPending; } + void set_popular_pending(bool b) { + if (b) { + _popularity = PopularPending; + } else { + _popularity = NotPopular; + } + } + + // + void calc_gc_efficiency(void); + double gc_efficiency() { return _gc_efficiency;} + // + + bool is_young() const { return _young_type != NotYoung; } + bool is_scan_only() const { return _young_type == ScanOnly; } + bool is_survivor() const { return _young_type == Survivor; } + + int young_index_in_cset() const { return _young_index_in_cset; } + void set_young_index_in_cset(int index) { + assert( (index == -1) || is_young(), "pre-condition" ); + _young_index_in_cset = index; + } + + int age_in_surv_rate_group() { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + return _surv_rate_group->age_in_group(_age_index); + } + + void recalculate_age_in_surv_rate_group() { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + _age_index = _surv_rate_group->recalculate_age_index(_age_index); + } + + void record_surv_words_in_group(size_t words_survived) { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + int age_in_group = age_in_surv_rate_group(); + _surv_rate_group->record_surviving_words(age_in_group, words_survived); + } + + int age_in_surv_rate_group_cond() { + if (_surv_rate_group != NULL) + return age_in_surv_rate_group(); + else + return -1; + } + + SurvRateGroup* surv_rate_group() { + return _surv_rate_group; + } + + void install_surv_rate_group(SurvRateGroup* surv_rate_group) { + assert( surv_rate_group != NULL, "pre-condition" ); + assert( _surv_rate_group == NULL, "pre-condition" ); + assert( is_young(), "pre-condition" ); + + _surv_rate_group = surv_rate_group; + _age_index = surv_rate_group->next_age_index(); + } + + void uninstall_surv_rate_group() { + if (_surv_rate_group != NULL) { + assert( _age_index > -1, "pre-condition" ); + assert( is_young(), "pre-condition" ); + + _surv_rate_group = NULL; + _age_index = -1; + } else { + assert( _age_index == -1, "pre-condition" ); + } + } + + void set_young() { set_young_type(Young); } + + void set_scan_only() { set_young_type(ScanOnly); } + + void set_survivor() { set_young_type(Survivor); } + + void set_not_young() { set_young_type(NotYoung); } + + // Determine if an object has been allocated since the last + // mark performed by the collector. This returns true iff the object + // is within the unmarked area of the region. + bool obj_allocated_since_prev_marking(oop obj) const { + return (HeapWord *) obj >= prev_top_at_mark_start(); + } + bool obj_allocated_since_next_marking(oop obj) const { + return (HeapWord *) obj >= next_top_at_mark_start(); + } + + // For parallel heapRegion traversal. + bool claimHeapRegion(int claimValue); + jint claim_value() { return _claimed; } + // Use this carefully: only when you're sure no one is claiming... + void set_claim_value(int claimValue) { _claimed = claimValue; } + + // Returns the "evacuation_failed" property of the region. + bool evacuation_failed() { return _evacuation_failed; } + + // Sets the "evacuation_failed" property of the region. + void set_evacuation_failed(bool b) { + _evacuation_failed = b; + + if (b) { + init_top_at_conc_mark_count(); + _next_marked_bytes = 0; + } + } + + // Requires that "mr" be entirely within the region. + // Apply "cl->do_object" to all objects that intersect with "mr". + // If the iteration encounters an unparseable portion of the region, + // or if "cl->abort()" is true after a closure application, + // terminate the iteration and return the address of the start of the + // subregion that isn't done. (The two can be distinguished by querying + // "cl->abort()".) Return of "NULL" indicates that the iteration + // completed. + HeapWord* + object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl); + + HeapWord* + oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl); + + // The region "mr" is entirely in "this", and starts and ends at block + // boundaries. The caller declares that all the contained blocks are + // coalesced into one. + void declare_filled_region_to_BOT(MemRegion mr) { + _offsets.single_block(mr.start(), mr.end()); + } + + // A version of block start that is guaranteed to find *some* block + // boundary at or before "p", but does not object iteration, and may + // therefore be used safely when the heap is unparseable. + HeapWord* block_start_careful(const void* p) const { + return _offsets.block_start_careful(p); + } + + // Requires that "addr" is within the region. Returns the start of the + // first ("careful") block that starts at or after "addr", or else the + // "end" of the region if there is no such block. + HeapWord* next_block_start_careful(HeapWord* addr); + + // Returns the zero-fill-state of the current region. + ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; } + bool zero_fill_is_allocated() { return _zfs == Allocated; } + Thread* zero_filler() { return _zero_filler; } + + // Indicate that the contents of the region are unknown, and therefore + // might require zero-filling. + void set_zero_fill_needed() { + set_zero_fill_state_work(NotZeroFilled); + } + void set_zero_fill_in_progress(Thread* t) { + set_zero_fill_state_work(ZeroFilling); + _zero_filler = t; + } + void set_zero_fill_complete(); + void set_zero_fill_allocated() { + set_zero_fill_state_work(Allocated); + } + + void set_zero_fill_state_work(ZeroFillState zfs); + + // This is called when a full collection shrinks the heap. + // We want to set the heap region to a value which says + // it is no longer part of the heap. For now, we'll let "NotZF" fill + // that role. + void reset_zero_fill() { + set_zero_fill_state_work(NotZeroFilled); + _zero_filler = NULL; + } + +#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix) \ + virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl); + SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL) + + CompactibleSpace* next_compaction_space() const; + + virtual void reset_after_compaction(); + + void print() const; + void print_on(outputStream* st) const; + + // Override + virtual void verify(bool allow_dirty) const; + +#ifdef DEBUG + HeapWord* allocate(size_t size); +#endif +}; + +// HeapRegionClosure is used for iterating over regions. +// Terminates the iteration when the "doHeapRegion" method returns "true". +class HeapRegionClosure : public StackObj { + friend class HeapRegionSeq; + friend class G1CollectedHeap; + + bool _complete; + void incomplete() { _complete = false; } + + public: + HeapRegionClosure(): _complete(true) {} + + // Typically called on each region until it returns true. + virtual bool doHeapRegion(HeapRegion* r) = 0; + + // True after iteration if the closure was applied to all heap regions + // and returned "false" in all cases. + bool complete() { return _complete; } +}; + +// A linked lists of heap regions. It leaves the "next" field +// unspecified; that's up to subtypes. +class RegionList { +protected: + virtual HeapRegion* get_next(HeapRegion* chr) = 0; + virtual void set_next(HeapRegion* chr, + HeapRegion* new_next) = 0; + + HeapRegion* _hd; + HeapRegion* _tl; + size_t _sz; + + // Protected constructor because this type is only meaningful + // when the _get/_set next functions are defined. + RegionList() : _hd(NULL), _tl(NULL), _sz(0) {} +public: + void reset() { + _hd = NULL; + _tl = NULL; + _sz = 0; + } + HeapRegion* hd() { return _hd; } + HeapRegion* tl() { return _tl; } + size_t sz() { return _sz; } + size_t length(); + + bool well_formed() { + return + ((hd() == NULL && tl() == NULL && sz() == 0) + || (hd() != NULL && tl() != NULL && sz() > 0)) + && (sz() == length()); + } + virtual void insert_before_head(HeapRegion* r); + void prepend_list(RegionList* new_list); + virtual HeapRegion* pop(); + void dec_sz() { _sz--; } + // Requires that "r" is an element of the list, and is not the tail. + void delete_after(HeapRegion* r); +}; + +class EmptyNonHRegionList: public RegionList { +protected: + // Protected constructor because this type is only meaningful + // when the _get/_set next functions are defined. + EmptyNonHRegionList() : RegionList() {} + +public: + void insert_before_head(HeapRegion* r) { + // assert(r->is_empty(), "Better be empty"); + assert(!r->isHumongous(), "Better not be humongous."); + RegionList::insert_before_head(r); + } + void prepend_list(EmptyNonHRegionList* new_list) { + // assert(new_list->hd() == NULL || new_list->hd()->is_empty(), + // "Better be empty"); + assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(), + "Better not be humongous."); + // assert(new_list->tl() == NULL || new_list->tl()->is_empty(), + // "Better be empty"); + assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(), + "Better not be humongous."); + RegionList::prepend_list(new_list); + } +}; + +class UncleanRegionList: public EmptyNonHRegionList { +public: + HeapRegion* get_next(HeapRegion* hr) { + return hr->next_from_unclean_list(); + } + void set_next(HeapRegion* hr, HeapRegion* new_next) { + hr->set_next_on_unclean_list(new_next); + } + + UncleanRegionList() : EmptyNonHRegionList() {} + + void insert_before_head(HeapRegion* r) { + assert(!r->is_on_free_list(), + "Better not already be on free list"); + assert(!r->is_on_unclean_list(), + "Better not already be on unclean list"); + r->set_zero_fill_needed(); + r->set_on_unclean_list(true); + EmptyNonHRegionList::insert_before_head(r); + } + void prepend_list(UncleanRegionList* new_list) { + assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(), + "Better not already be on free list"); + assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(), + "Better already be marked as on unclean list"); + assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(), + "Better not already be on free list"); + assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(), + "Better already be marked as on unclean list"); + EmptyNonHRegionList::prepend_list(new_list); + } + HeapRegion* pop() { + HeapRegion* res = RegionList::pop(); + if (res != NULL) res->set_on_unclean_list(false); + return res; + } +}; + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** + +#endif // SERIALGC diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) { + HeapWord* res = ContiguousSpace::allocate(size); + if (res != NULL) { + _offsets.alloc_block(res, size); + } + return res; +} + +// Because of the requirement of keeping "_offsets" up to date with the +// allocations, we sequentialize these with a lock. Therefore, best if +// this is used for larger LAB allocations only. +inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) { + MutexLocker x(&_par_alloc_lock); + // This ought to be just "allocate", because of the lock above, but that + // ContiguousSpace::allocate asserts that either the allocating thread + // holds the heap lock or it is the VM thread and we're at a safepoint. + // The best I (dld) could figure was to put a field in ContiguousSpace + // meaning "locking at safepoint taken care of", and set/reset that + // here. But this will do for now, especially in light of the comment + // above. Perhaps in the future some lock-free manner of keeping the + // coordination. + HeapWord* res = ContiguousSpace::par_allocate(size); + if (res != NULL) { + _offsets.alloc_block(res, size); + } + return res; +} + +inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) { + return _offsets.block_start(p); +} + +inline HeapWord* +G1OffsetTableContigSpace::block_start_const(const void* p) const { + return _offsets.block_start_const(p); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,1443 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegionRemSet.cpp.incl" + +#define HRRS_VERBOSE 0 + +#define PRT_COUNT_OCCUPIED 1 + +// OtherRegionsTable + +class PerRegionTable: public CHeapObj { + friend class OtherRegionsTable; + friend class HeapRegionRemSetIterator; + + HeapRegion* _hr; + BitMap _bm; +#if PRT_COUNT_OCCUPIED + jint _occupied; +#endif + PerRegionTable* _next_free; + + PerRegionTable* next_free() { return _next_free; } + void set_next_free(PerRegionTable* prt) { _next_free = prt; } + + + static PerRegionTable* _free_list; + +#ifdef _MSC_VER + // For some reason even though the classes are marked as friend they are unable + // to access CardsPerRegion when private/protected. Only the windows c++ compiler + // says this Sun CC and linux gcc don't have a problem with access when private + + public: + +#endif // _MSC_VER + + enum SomePrivateConstants { + CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift + }; + +protected: + // We need access in order to union things into the base table. + BitMap* bm() { return &_bm; } + + void recount_occupied() { + _occupied = (jint) bm()->count_one_bits(); + } + + PerRegionTable(HeapRegion* hr) : + _hr(hr), +#if PRT_COUNT_OCCUPIED + _occupied(0), +#endif + _bm(CardsPerRegion, false /* in-resource-area */) + {} + + static void free(PerRegionTable* prt) { + while (true) { + PerRegionTable* fl = _free_list; + prt->set_next_free(fl); + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr(prt, &_free_list, fl); + if (res == fl) return; + } + ShouldNotReachHere(); + } + + static PerRegionTable* alloc(HeapRegion* hr) { + PerRegionTable* fl = _free_list; + while (fl != NULL) { + PerRegionTable* nxt = fl->next_free(); + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr); + return fl; + } else { + fl = _free_list; + } + } + assert(fl == NULL, "Loop condition."); + return new PerRegionTable(hr); + } + + void add_card_work(short from_card, bool par) { + if (!_bm.at(from_card)) { + if (par) { + if (_bm.par_at_put(from_card, 1)) { +#if PRT_COUNT_OCCUPIED + Atomic::inc(&_occupied); +#endif + } + } else { + _bm.at_put(from_card, 1); +#if PRT_COUNT_OCCUPIED + _occupied++; +#endif + } + } + } + + void add_reference_work(oop* from, bool par) { + // Must make this robust in case "from" is not in "_hr", because of + // concurrency. + +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").", + from, *from); +#endif + + HeapRegion* loc_hr = hr(); + // If the test below fails, then this table was reused concurrently + // with this operation. This is OK, since the old table was coarsened, + // and adding a bit to the new table is never incorrect. + if (loc_hr->is_in_reserved(from)) { + size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom()); + size_t from_card = + hw_offset >> + (CardTableModRefBS::card_shift - LogHeapWordSize); + + add_card_work((short) from_card, par); + } + } + +public: + + HeapRegion* hr() const { return _hr; } + +#if PRT_COUNT_OCCUPIED + jint occupied() const { + // Overkill, but if we ever need it... + // guarantee(_occupied == _bm.count_one_bits(), "Check"); + return _occupied; + } +#else + jint occupied() const { + return _bm.count_one_bits(); + } +#endif + + void init(HeapRegion* hr) { + _hr = hr; +#if PRT_COUNT_OCCUPIED + _occupied = 0; +#endif + _bm.clear(); + } + + void add_reference(oop* from) { + add_reference_work(from, /*parallel*/ true); + } + + void seq_add_reference(oop* from) { + add_reference_work(from, /*parallel*/ false); + } + + void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { + HeapWord* hr_bot = hr()->bottom(); + int hr_first_card_index = ctbs->index_for(hr_bot); + bm()->set_intersection_at_offset(*card_bm, hr_first_card_index); +#if PRT_COUNT_OCCUPIED + recount_occupied(); +#endif + } + + void add_card(short from_card_index) { + add_card_work(from_card_index, /*parallel*/ true); + } + + void seq_add_card(short from_card_index) { + add_card_work(from_card_index, /*parallel*/ false); + } + + // (Destructively) union the bitmap of the current table into the given + // bitmap (which is assumed to be of the same size.) + void union_bitmap_into(BitMap* bm) { + bm->set_union(_bm); + } + + // Mem size in bytes. + size_t mem_size() const { + return sizeof(this) + _bm.size_in_words() * HeapWordSize; + } + + static size_t fl_mem_size() { + PerRegionTable* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += sizeof(PerRegionTable); + cur = cur->next_free(); + } + return res; + } + + // Requires "from" to be in "hr()". + bool contains_reference(oop* from) const { + assert(hr()->is_in_reserved(from), "Precondition."); + size_t card_ind = pointer_delta(from, hr()->bottom(), + CardTableModRefBS::card_size); + return _bm.at(card_ind); + } +}; + +PerRegionTable* PerRegionTable::_free_list = NULL; + + +#define COUNT_PAR_EXPANDS 0 + +#if COUNT_PAR_EXPANDS +static jint n_par_expands = 0; +static jint n_par_contracts = 0; +static jint par_expand_list_len = 0; +static jint max_par_expand_list_len = 0; + +static void print_par_expand() { + Atomic::inc(&n_par_expands); + Atomic::inc(&par_expand_list_len); + if (par_expand_list_len > max_par_expand_list_len) { + max_par_expand_list_len = par_expand_list_len; + } + if ((n_par_expands % 10) == 0) { + gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, " + "len = %d, max_len = %d\n.", + n_par_expands, n_par_contracts, par_expand_list_len, + max_par_expand_list_len); + } +} +#endif + +class PosParPRT: public PerRegionTable { + PerRegionTable** _par_tables; + + enum SomePrivateConstants { + ReserveParTableExpansion = 1 + }; + + void par_expand() { + int n = HeapRegionRemSet::num_par_rem_sets()-1; + if (n <= 0) return; + if (_par_tables == NULL) { + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion, + &_par_tables, NULL); + if (res != NULL) return; + // Otherwise, we reserved the right to do the expansion. + + PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n); + for (int i = 0; i < n; i++) { + PerRegionTable* ptable = PerRegionTable::alloc(hr()); + ptables[i] = ptable; + } + // Here we do not need an atomic. + _par_tables = ptables; +#if COUNT_PAR_EXPANDS + print_par_expand(); +#endif + // We must put this table on the expanded list. + PosParPRT* exp_head = _par_expanded_list; + while (true) { + set_next_par_expanded(exp_head); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head); + if (res == exp_head) return; + // Otherwise. + exp_head = res; + } + ShouldNotReachHere(); + } + } + + void par_contract() { + assert(_par_tables != NULL, "Precondition."); + int n = HeapRegionRemSet::num_par_rem_sets()-1; + for (int i = 0; i < n; i++) { + _par_tables[i]->union_bitmap_into(bm()); + PerRegionTable::free(_par_tables[i]); + _par_tables[i] = NULL; + } +#if PRT_COUNT_OCCUPIED + // We must recount the "occupied." + recount_occupied(); +#endif + FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables); + _par_tables = NULL; +#if COUNT_PAR_EXPANDS + Atomic::inc(&n_par_contracts); + Atomic::dec(&par_expand_list_len); +#endif + } + + static PerRegionTable** _par_table_fl; + + PosParPRT* _next; + + static PosParPRT* _free_list; + + PerRegionTable** par_tables() const { + assert(uintptr_t(NULL) == 0, "Assumption."); + if (uintptr_t(_par_tables) <= ReserveParTableExpansion) + return NULL; + else + return _par_tables; + } + + PosParPRT* _next_par_expanded; + PosParPRT* next_par_expanded() { return _next_par_expanded; } + void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; } + static PosParPRT* _par_expanded_list; + +public: + + PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {} + + jint occupied() const { + jint res = PerRegionTable::occupied(); + if (par_tables() != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + res += par_tables()[i]->occupied(); + } + } + return res; + } + + void init(HeapRegion* hr) { + PerRegionTable::init(hr); + _next = NULL; + if (par_tables() != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + par_tables()[i]->init(hr); + } + } + } + + static void free(PosParPRT* prt) { + while (true) { + PosParPRT* fl = _free_list; + prt->set_next(fl); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(prt, &_free_list, fl); + if (res == fl) return; + } + ShouldNotReachHere(); + } + + static PosParPRT* alloc(HeapRegion* hr) { + PosParPRT* fl = _free_list; + while (fl != NULL) { + PosParPRT* nxt = fl->next(); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr); + return fl; + } else { + fl = _free_list; + } + } + assert(fl == NULL, "Loop condition."); + return new PosParPRT(hr); + } + + PosParPRT* next() const { return _next; } + void set_next(PosParPRT* nxt) { _next = nxt; } + PosParPRT** next_addr() { return &_next; } + + void add_reference(oop* from, int tid) { + // Expand if necessary. + PerRegionTable** pt = par_tables(); + if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) { + par_expand(); + pt = par_tables(); + } + if (pt != NULL) { + // We always have to assume that mods to table 0 are in parallel, + // because of the claiming scheme in parallel expansion. A thread + // with tid != 0 that finds the table to be NULL, but doesn't succeed + // in claiming the right of expanding it, will end up in the else + // clause of the above if test. That thread could be delayed, and a + // thread 0 add reference could see the table expanded, and come + // here. Both threads would be adding in parallel. But we get to + // not use atomics for tids > 0. + if (tid == 0) { + PerRegionTable::add_reference(from); + } else { + pt[tid-1]->seq_add_reference(from); + } + } else { + // Not expanded -- add to the base table. + PerRegionTable::add_reference(from); + } + } + + void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { + assert(_par_tables == NULL, "Precondition"); + PerRegionTable::scrub(ctbs, card_bm); + } + + size_t mem_size() const { + size_t res = + PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable); + if (_par_tables != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + res += _par_tables[i]->mem_size(); + } + } + return res; + } + + static size_t fl_mem_size() { + PosParPRT* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += sizeof(PosParPRT); + cur = cur->next(); + } + return res; + } + + bool contains_reference(oop* from) const { + if (PerRegionTable::contains_reference(from)) return true; + if (_par_tables != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + if (_par_tables[i]->contains_reference(from)) return true; + } + } + return false; + } + + static void par_contract_all(); + +}; + +void PosParPRT::par_contract_all() { + PosParPRT* hd = _par_expanded_list; + while (hd != NULL) { + PosParPRT* nxt = hd->next_par_expanded(); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd); + if (res == hd) { + // We claimed the right to contract this table. + hd->set_next_par_expanded(NULL); + hd->par_contract(); + hd = _par_expanded_list; + } else { + hd = res; + } + } +} + +PosParPRT* PosParPRT::_free_list = NULL; +PosParPRT* PosParPRT::_par_expanded_list = NULL; + +jint OtherRegionsTable::_cache_probes = 0; +jint OtherRegionsTable::_cache_hits = 0; + +size_t OtherRegionsTable::_max_fine_entries = 0; +size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0; +#if SAMPLE_FOR_EVICTION +size_t OtherRegionsTable::_fine_eviction_stride = 0; +size_t OtherRegionsTable::_fine_eviction_sample_size = 0; +#endif + +OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) : + _g1h(G1CollectedHeap::heap()), + _m(Mutex::leaf, "An OtherRegionsTable lock", true), + _hr(hr), + _coarse_map(G1CollectedHeap::heap()->max_regions(), + false /* in-resource-area */), + _fine_grain_regions(NULL), + _n_fine_entries(0), _n_coarse_entries(0), +#if SAMPLE_FOR_EVICTION + _fine_eviction_start(0), +#endif + _sparse_table(hr) +{ + typedef PosParPRT* PosParPRTPtr; + if (_max_fine_entries == 0) { + assert(_mod_max_fine_entries_mask == 0, "Both or none."); + _max_fine_entries = (1 << G1LogRSRegionEntries); + _mod_max_fine_entries_mask = _max_fine_entries - 1; +#if SAMPLE_FOR_EVICTION + assert(_fine_eviction_sample_size == 0 + && _fine_eviction_stride == 0, "All init at same time."); + _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries); + _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size; +#endif + } + _fine_grain_regions = new PosParPRTPtr[_max_fine_entries]; + if (_fine_grain_regions == NULL) + vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries, + "Failed to allocate _fine_grain_entries."); + for (size_t i = 0; i < _max_fine_entries; i++) { + _fine_grain_regions[i] = NULL; + } +} + +int** OtherRegionsTable::_from_card_cache = NULL; +size_t OtherRegionsTable::_from_card_cache_max_regions = 0; +size_t OtherRegionsTable::_from_card_cache_mem_size = 0; + +void OtherRegionsTable::init_from_card_cache(size_t max_regions) { + _from_card_cache_max_regions = max_regions; + + int n_par_rs = HeapRegionRemSet::num_par_rem_sets(); + _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs); + for (int i = 0; i < n_par_rs; i++) { + _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions); + for (size_t j = 0; j < max_regions; j++) { + _from_card_cache[i][j] = -1; // An invalid value. + } + } + _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int); +} + +void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max."); + for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) { + _from_card_cache[i][j] = -1; // An invalid value. + } + } +} + +#ifndef PRODUCT +void OtherRegionsTable::print_from_card_cache() { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + for (size_t j = 0; j < _from_card_cache_max_regions; j++) { + gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.", + i, j, _from_card_cache[i][j]); + } + } +} +#endif + +void OtherRegionsTable::add_reference(oop* from, int tid) { + size_t cur_hrs_ind = hr()->hrs_index(); + +#if HRRS_VERBOSE + gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").", + from, *from); +#endif + + int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift); + +#if HRRS_VERBOSE + gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)", + hr()->bottom(), from_card, + _from_card_cache[tid][cur_hrs_ind]); +#endif + +#define COUNT_CACHE 0 +#if COUNT_CACHE + jint p = Atomic::add(1, &_cache_probes); + if ((p % 10000) == 0) { + jint hits = _cache_hits; + gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.", + _cache_hits, p, 100.0* (float)hits/(float)p); + } +#endif + if (from_card == _from_card_cache[tid][cur_hrs_ind]) { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" from-card cache hit."); +#endif +#if COUNT_CACHE + Atomic::inc(&_cache_hits); +#endif + assert(contains_reference(from), "We just added it!"); + return; + } else { + _from_card_cache[tid][cur_hrs_ind] = from_card; + } + + // Note that this may be a continued H region. + HeapRegion* from_hr = _g1h->heap_region_containing_raw(from); + size_t from_hrs_ind = (size_t)from_hr->hrs_index(); + + // If the region is already coarsened, return. + if (_coarse_map.at(from_hrs_ind)) { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" coarse map hit."); +#endif + assert(contains_reference(from), "We just added it!"); + return; + } + + // Otherwise find a per-region table to add it to. + size_t ind = from_hrs_ind & _mod_max_fine_entries_mask; + PosParPRT* prt = find_region_table(ind, from_hr); + if (prt == NULL) { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + // Confirm that it's really not there... + prt = find_region_table(ind, from_hr); + if (prt == NULL) { + + uintptr_t from_hr_bot_card_index = + uintptr_t(from_hr->bottom()) + >> CardTableModRefBS::card_shift; + int card_index = from_card - from_hr_bot_card_index; + assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion, + "Must be in range."); + if (G1HRRSUseSparseTable && + _sparse_table.add_card((short) from_hrs_ind, card_index)) { + if (G1RecordHRRSOops) { + HeapRegionRemSet::record(hr(), from); +#if HRRS_VERBOSE + gclog_or_tty->print(" Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); +#endif + } +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" added card to sparse table."); +#endif + assert(contains_reference_locked(from), "We just added it!"); + return; + } else { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" [tid %d] sparse table entry " + "overflow(f: %d, t: %d)", + tid, from_hrs_ind, cur_hrs_ind); +#endif + } + + // Otherwise, transfer from sparse to fine-grain. + short cards[SparsePRTEntry::CardsPerEntry]; + if (G1HRRSUseSparseTable) { + bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]); + assert(res, "There should have been an entry"); + } + + if (_n_fine_entries == _max_fine_entries) { + prt = delete_region_table(); + } else { + prt = PosParPRT::alloc(from_hr); + } + prt->init(from_hr); + // Record the outgoing pointer in the from_region's outgoing bitmap. + from_hr->rem_set()->add_outgoing_reference(hr()); + + PosParPRT* first_prt = _fine_grain_regions[ind]; + prt->set_next(first_prt); // XXX Maybe move to init? + _fine_grain_regions[ind] = prt; + _n_fine_entries++; + + // Add in the cards from the sparse table. + if (G1HRRSUseSparseTable) { + for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) { + short c = cards[i]; + if (c != SparsePRTEntry::NullEntry) { + prt->add_card(c); + } + } + // Now we can delete the sparse entry. + bool res = _sparse_table.delete_entry((short) from_hrs_ind); + assert(res, "It should have been there."); + } + } + assert(prt != NULL && prt->hr() == from_hr, "consequence"); + } + // Note that we can't assert "prt->hr() == from_hr", because of the + // possibility of concurrent reuse. But see head comment of + // OtherRegionsTable for why this is OK. + assert(prt != NULL, "Inv"); + + prt->add_reference(from, tid); + if (G1RecordHRRSOops) { + HeapRegionRemSet::record(hr(), from); +#if HRRS_VERBOSE + gclog_or_tty->print("Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); +#endif + } + assert(contains_reference(from), "We just added it!"); +} + +PosParPRT* +OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const { + assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); + PosParPRT* prt = _fine_grain_regions[ind]; + while (prt != NULL && prt->hr() != hr) { + prt = prt->next(); + } + // Loop postcondition is the method postcondition. + return prt; +} + + +#define DRT_CENSUS 0 + +#if DRT_CENSUS +static const int HistoSize = 6; +static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; +static int coarsenings = 0; +static int occ_sum = 0; +#endif + +jint OtherRegionsTable::_n_coarsenings = 0; + +PosParPRT* OtherRegionsTable::delete_region_table() { +#if DRT_CENSUS + int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; + const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 }; +#endif + + assert(_m.owned_by_self(), "Precondition"); + assert(_n_fine_entries == _max_fine_entries, "Precondition"); + PosParPRT* max = NULL; + jint max_occ = 0; + PosParPRT** max_prev; + size_t max_ind; + +#if SAMPLE_FOR_EVICTION + size_t i = _fine_eviction_start; + for (size_t k = 0; k < _fine_eviction_sample_size; k++) { + size_t ii = i; + // Make sure we get a non-NULL sample. + while (_fine_grain_regions[ii] == NULL) { + ii++; + if (ii == _max_fine_entries) ii = 0; + guarantee(ii != i, "We must find one."); + } + PosParPRT** prev = &_fine_grain_regions[ii]; + PosParPRT* cur = *prev; + while (cur != NULL) { + jint cur_occ = cur->occupied(); + if (max == NULL || cur_occ > max_occ) { + max = cur; + max_prev = prev; + max_ind = i; + max_occ = cur_occ; + } + prev = cur->next_addr(); + cur = cur->next(); + } + i = i + _fine_eviction_stride; + if (i >= _n_fine_entries) i = i - _n_fine_entries; + } + _fine_eviction_start++; + if (_fine_eviction_start >= _n_fine_entries) + _fine_eviction_start -= _n_fine_entries; +#else + for (int i = 0; i < _max_fine_entries; i++) { + PosParPRT** prev = &_fine_grain_regions[i]; + PosParPRT* cur = *prev; + while (cur != NULL) { + jint cur_occ = cur->occupied(); +#if DRT_CENSUS + for (int k = 0; k < HistoSize; k++) { + if (cur_occ <= histo_limits[k]) { + histo[k]++; global_histo[k]++; break; + } + } +#endif + if (max == NULL || cur_occ > max_occ) { + max = cur; + max_prev = prev; + max_ind = i; + max_occ = cur_occ; + } + prev = cur->next_addr(); + cur = cur->next(); + } + } +#endif + // XXX + guarantee(max != NULL, "Since _n_fine_entries > 0"); +#if DRT_CENSUS + gclog_or_tty->print_cr("In a coarsening: histo of occs:"); + for (int k = 0; k < HistoSize; k++) { + gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], histo[k]); + } + coarsenings++; + occ_sum += max_occ; + if ((coarsenings % 100) == 0) { + gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings); + for (int k = 0; k < HistoSize; k++) { + gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], global_histo[k]); + } + gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.", + (float)occ_sum/(float)coarsenings); + } +#endif + + // Set the corresponding coarse bit. + int max_hrs_index = max->hr()->hrs_index(); + if (!_coarse_map.at(max_hrs_index)) { + _coarse_map.at_put(max_hrs_index, true); + _n_coarse_entries++; +#if 0 + gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] " + "for region [" PTR_FORMAT "...] (%d coarse entries).\n", + hr()->bottom(), + max->hr()->bottom(), + _n_coarse_entries); +#endif + } + + // Unsplice. + *max_prev = max->next(); + Atomic::inc(&_n_coarsenings); + _n_fine_entries--; + return max; +} + + +// At present, this must be called stop-world single-threaded. +void OtherRegionsTable::scrub(CardTableModRefBS* ctbs, + BitMap* region_bm, BitMap* card_bm) { + // First eliminated garbage regions from the coarse map. + if (G1RSScrubVerbose) + gclog_or_tty->print_cr("Scrubbing region %d:", hr()->hrs_index()); + + assert(_coarse_map.size() == region_bm->size(), "Precondition"); + if (G1RSScrubVerbose) + gclog_or_tty->print(" Coarse map: before = %d...", _n_coarse_entries); + _coarse_map.set_intersection(*region_bm); + _n_coarse_entries = _coarse_map.count_one_bits(); + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" after = %d.", _n_coarse_entries); + + // Now do the fine-grained maps. + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + PosParPRT** prev = &_fine_grain_regions[i]; + while (cur != NULL) { + PosParPRT* nxt = cur->next(); + // If the entire region is dead, eliminate. + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" For other region %d:", cur->hr()->hrs_index()); + if (!region_bm->at(cur->hr()->hrs_index())) { + *prev = nxt; + cur->set_next(NULL); + _n_fine_entries--; + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" deleted via region map."); + PosParPRT::free(cur); + } else { + // Do fine-grain elimination. + if (G1RSScrubVerbose) + gclog_or_tty->print(" occ: before = %4d.", cur->occupied()); + cur->scrub(ctbs, card_bm); + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" after = %4d.", cur->occupied()); + // Did that empty the table completely? + if (cur->occupied() == 0) { + *prev = nxt; + cur->set_next(NULL); + _n_fine_entries--; + PosParPRT::free(cur); + } else { + prev = cur->next_addr(); + } + } + cur = nxt; + } + } + // Since we may have deleted a from_card_cache entry from the RS, clear + // the FCC. + clear_fcc(); +} + + +size_t OtherRegionsTable::occupied() const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + size_t sum = occ_fine(); + sum += occ_sparse(); + sum += occ_coarse(); + return sum; +} + +size_t OtherRegionsTable::occ_fine() const { + size_t sum = 0; + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + sum += cur->occupied(); + cur = cur->next(); + } + } + return sum; +} + +size_t OtherRegionsTable::occ_coarse() const { + return (_n_coarse_entries * PosParPRT::CardsPerRegion); +} + +size_t OtherRegionsTable::occ_sparse() const { + return _sparse_table.occupied(); +} + +size_t OtherRegionsTable::mem_size() const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + size_t sum = 0; + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + sum += cur->mem_size(); + cur = cur->next(); + } + } + sum += (sizeof(PosParPRT*) * _max_fine_entries); + sum += (_coarse_map.size_in_words() * HeapWordSize); + sum += (_sparse_table.mem_size()); + sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above. + return sum; +} + +size_t OtherRegionsTable::static_mem_size() { + return _from_card_cache_mem_size; +} + +size_t OtherRegionsTable::fl_mem_size() { + return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size(); +} + +void OtherRegionsTable::clear_fcc() { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + _from_card_cache[i][hr()->hrs_index()] = -1; + } +} + +void OtherRegionsTable::clear() { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + PosParPRT* nxt = cur->next(); + PosParPRT::free(cur); + cur = nxt; + } + _fine_grain_regions[i] = NULL; + } + _sparse_table.clear(); + _coarse_map.clear(); + _n_fine_entries = 0; + _n_coarse_entries = 0; + + clear_fcc(); +} + +void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + size_t hrs_ind = (size_t)from_hr->hrs_index(); + size_t ind = hrs_ind & _mod_max_fine_entries_mask; + if (del_single_region_table(ind, from_hr)) { + assert(!_coarse_map.at(hrs_ind), "Inv"); + } else { + _coarse_map.par_at_put(hrs_ind, 0); + } + // Check to see if any of the fcc entries come from here. + int hr_ind = hr()->hrs_index(); + for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { + int fcc_ent = _from_card_cache[tid][hr_ind]; + if (fcc_ent != -1) { + HeapWord* card_addr = (HeapWord*) + (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift); + if (hr()->is_in_reserved(card_addr)) { + // Clear the from card cache. + _from_card_cache[tid][hr_ind] = -1; + } + } + } +} + +bool OtherRegionsTable::del_single_region_table(size_t ind, + HeapRegion* hr) { + assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); + PosParPRT** prev_addr = &_fine_grain_regions[ind]; + PosParPRT* prt = *prev_addr; + while (prt != NULL && prt->hr() != hr) { + prev_addr = prt->next_addr(); + prt = prt->next(); + } + if (prt != NULL) { + assert(prt->hr() == hr, "Loop postcondition."); + *prev_addr = prt->next(); + PosParPRT::free(prt); + _n_fine_entries--; + return true; + } else { + return false; + } +} + +bool OtherRegionsTable::contains_reference(oop* from) const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + return contains_reference_locked(from); +} + +bool OtherRegionsTable::contains_reference_locked(oop* from) const { + HeapRegion* hr = _g1h->heap_region_containing_raw(from); + if (hr == NULL) return false; + size_t hr_ind = hr->hrs_index(); + // Is this region in the coarse map? + if (_coarse_map.at(hr_ind)) return true; + + PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask, + hr); + if (prt != NULL) { + return prt->contains_reference(from); + + } else { + uintptr_t from_card = + (uintptr_t(from) >> CardTableModRefBS::card_shift); + uintptr_t hr_bot_card_index = + uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift; + assert(from_card >= hr_bot_card_index, "Inv"); + int card_index = from_card - hr_bot_card_index; + return _sparse_table.contains_card((short)hr_ind, card_index); + } + + +} + + +bool HeapRegionRemSet::_par_traversal = false; + +void HeapRegionRemSet::set_par_traversal(bool b) { + assert(_par_traversal != b, "Proper alternation..."); + _par_traversal = b; +} + +int HeapRegionRemSet::num_par_rem_sets() { + // We always have at least two, so that a mutator thread can claim an + // id and add to a rem set. + return (int) MAX2(ParallelGCThreads, (size_t)2); +} + +HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, + HeapRegion* hr) + : _bosa(bosa), _other_regions(hr), + _outgoing_region_map(G1CollectedHeap::heap()->max_regions(), + false /* in-resource-area */), + _iter_state(Unclaimed) +{} + + +void HeapRegionRemSet::init_for_par_iteration() { + _iter_state = Unclaimed; +} + +bool HeapRegionRemSet::claim_iter() { + if (_iter_state != Unclaimed) return false; + jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed); + return (res == Unclaimed); +} + +void HeapRegionRemSet::set_iter_complete() { + _iter_state = Complete; +} + +bool HeapRegionRemSet::iter_is_complete() { + return _iter_state == Complete; +} + + +void HeapRegionRemSet::init_iterator(HeapRegionRemSetIterator* iter) const { + iter->initialize(this); +} + +#ifndef PRODUCT +void HeapRegionRemSet::print() const { + HeapRegionRemSetIterator iter; + init_iterator(&iter); + size_t card_index; + while (iter.has_next(card_index)) { + HeapWord* card_start = + G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); + gclog_or_tty->print_cr(" Card " PTR_FORMAT ".", card_start); + } + // XXX + if (iter.n_yielded() != occupied()) { + gclog_or_tty->print_cr("Yielded disagrees with occupied:"); + gclog_or_tty->print_cr(" %6d yielded (%6d coarse, %6d fine).", + iter.n_yielded(), + iter.n_yielded_coarse(), iter.n_yielded_fine()); + gclog_or_tty->print_cr(" %6d occ (%6d coarse, %6d fine).", + occupied(), occ_coarse(), occ_fine()); + } + guarantee(iter.n_yielded() == occupied(), + "We should have yielded all the represented cards."); +} +#endif + +void HeapRegionRemSet::cleanup() { + SparsePRT::cleanup_all(); +} + +void HeapRegionRemSet::par_cleanup() { + PosParPRT::par_contract_all(); +} + +void HeapRegionRemSet::add_outgoing_reference(HeapRegion* to_hr) { + _outgoing_region_map.par_at_put(to_hr->hrs_index(), 1); +} + +void HeapRegionRemSet::clear() { + clear_outgoing_entries(); + _outgoing_region_map.clear(); + _other_regions.clear(); + assert(occupied() == 0, "Should be clear."); +} + +void HeapRegionRemSet::clear_outgoing_entries() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t i = _outgoing_region_map.get_next_one_offset(0); + while (i < _outgoing_region_map.size()) { + HeapRegion* to_region = g1h->region_at(i); + to_region->rem_set()->clear_incoming_entry(hr()); + i = _outgoing_region_map.get_next_one_offset(i+1); + } +} + + +void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs, + BitMap* region_bm, BitMap* card_bm) { + _other_regions.scrub(ctbs, region_bm, card_bm); +} + +//-------------------- Iteration -------------------- + +HeapRegionRemSetIterator:: +HeapRegionRemSetIterator() : + _hrrs(NULL), + _g1h(G1CollectedHeap::heap()), + _bosa(NULL), + _sparse_iter(size_t(G1CollectedHeap::heap()->reserved_region().start()) + >> CardTableModRefBS::card_shift) +{} + +void HeapRegionRemSetIterator::initialize(const HeapRegionRemSet* hrrs) { + _hrrs = hrrs; + _coarse_map = &_hrrs->_other_regions._coarse_map; + _fine_grain_regions = _hrrs->_other_regions._fine_grain_regions; + _bosa = _hrrs->bosa(); + + _is = Sparse; + // Set these values so that we increment to the first region. + _coarse_cur_region_index = -1; + _coarse_cur_region_cur_card = (PosParPRT::CardsPerRegion-1);; + + _cur_region_cur_card = 0; + + _fine_array_index = -1; + _fine_cur_prt = NULL; + + _n_yielded_coarse = 0; + _n_yielded_fine = 0; + _n_yielded_sparse = 0; + + _sparse_iter.init(&hrrs->_other_regions._sparse_table); +} + +bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) { + if (_hrrs->_other_regions._n_coarse_entries == 0) return false; + // Go to the next card. + _coarse_cur_region_cur_card++; + // Was the last the last card in the current region? + if (_coarse_cur_region_cur_card == PosParPRT::CardsPerRegion) { + // Yes: find the next region. This may leave _coarse_cur_region_index + // Set to the last index, in which case there are no more coarse + // regions. + _coarse_cur_region_index = + (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1); + if ((size_t)_coarse_cur_region_index < _coarse_map->size()) { + _coarse_cur_region_cur_card = 0; + HeapWord* r_bot = + _g1h->region_at(_coarse_cur_region_index)->bottom(); + _cur_region_card_offset = _bosa->index_for(r_bot); + } else { + return false; + } + } + // If we didn't return false above, then we can yield a card. + card_index = _cur_region_card_offset + _coarse_cur_region_cur_card; + return true; +} + +void HeapRegionRemSetIterator::fine_find_next_non_null_prt() { + // Otherwise, find the next bucket list in the array. + _fine_array_index++; + while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) { + _fine_cur_prt = _fine_grain_regions[_fine_array_index]; + if (_fine_cur_prt != NULL) return; + else _fine_array_index++; + } + assert(_fine_cur_prt == NULL, "Loop post"); +} + +bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) { + if (fine_has_next()) { + _cur_region_cur_card = + _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1); + } + while (!fine_has_next()) { + if (_cur_region_cur_card == PosParPRT::CardsPerRegion) { + _cur_region_cur_card = 0; + _fine_cur_prt = _fine_cur_prt->next(); + } + if (_fine_cur_prt == NULL) { + fine_find_next_non_null_prt(); + if (_fine_cur_prt == NULL) return false; + } + assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0, + "inv."); + HeapWord* r_bot = + _fine_cur_prt->hr()->bottom(); + _cur_region_card_offset = _bosa->index_for(r_bot); + _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0); + } + assert(fine_has_next(), "Or else we exited the loop via the return."); + card_index = _cur_region_card_offset + _cur_region_cur_card; + return true; +} + +bool HeapRegionRemSetIterator::fine_has_next() { + return + _fine_cur_prt != NULL && + _cur_region_cur_card < PosParPRT::CardsPerRegion; +} + +bool HeapRegionRemSetIterator::has_next(size_t& card_index) { + switch (_is) { + case Sparse: + if (_sparse_iter.has_next(card_index)) { + _n_yielded_sparse++; + return true; + } + // Otherwise, deliberate fall-through + _is = Fine; + case Fine: + if (fine_has_next(card_index)) { + _n_yielded_fine++; + return true; + } + // Otherwise, deliberate fall-through + _is = Coarse; + case Coarse: + if (coarse_has_next(card_index)) { + _n_yielded_coarse++; + return true; + } + // Otherwise... + break; + } + assert(ParallelGCThreads > 1 || + n_yielded() == _hrrs->occupied(), + "Should have yielded all the cards in the rem set " + "(in the non-par case)."); + return false; +} + + + +oop** HeapRegionRemSet::_recorded_oops = NULL; +HeapWord** HeapRegionRemSet::_recorded_cards = NULL; +HeapRegion** HeapRegionRemSet::_recorded_regions = NULL; +int HeapRegionRemSet::_n_recorded = 0; + +HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL; +int* HeapRegionRemSet::_recorded_event_index = NULL; +int HeapRegionRemSet::_n_recorded_events = 0; + +void HeapRegionRemSet::record(HeapRegion* hr, oop* f) { + if (_recorded_oops == NULL) { + assert(_n_recorded == 0 + && _recorded_cards == NULL + && _recorded_regions == NULL, + "Inv"); + _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded); + _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded); + _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded); + } + if (_n_recorded == MaxRecorded) { + gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded); + } else { + _recorded_cards[_n_recorded] = + (HeapWord*)align_size_down(uintptr_t(f), + CardTableModRefBS::card_size); + _recorded_oops[_n_recorded] = f; + _recorded_regions[_n_recorded] = hr; + _n_recorded++; + } +} + +void HeapRegionRemSet::record_event(Event evnt) { + if (!G1RecordHRRSEvents) return; + + if (_recorded_events == NULL) { + assert(_n_recorded_events == 0 + && _recorded_event_index == NULL, + "Inv"); + _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents); + _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents); + } + if (_n_recorded_events == MaxRecordedEvents) { + gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents); + } else { + _recorded_events[_n_recorded_events] = evnt; + _recorded_event_index[_n_recorded_events] = _n_recorded; + _n_recorded_events++; + } +} + +void HeapRegionRemSet::print_event(outputStream* str, Event evnt) { + switch (evnt) { + case Event_EvacStart: + str->print("Evac Start"); + break; + case Event_EvacEnd: + str->print("Evac End"); + break; + case Event_RSUpdateEnd: + str->print("RS Update End"); + break; + } +} + +void HeapRegionRemSet::print_recorded() { + int cur_evnt = 0; + Event cur_evnt_kind; + int cur_evnt_ind = 0; + if (_n_recorded_events > 0) { + cur_evnt_kind = _recorded_events[cur_evnt]; + cur_evnt_ind = _recorded_event_index[cur_evnt]; + } + + for (int i = 0; i < _n_recorded; i++) { + while (cur_evnt < _n_recorded_events && i == cur_evnt_ind) { + gclog_or_tty->print("Event: "); + print_event(gclog_or_tty, cur_evnt_kind); + gclog_or_tty->print_cr(""); + cur_evnt++; + if (cur_evnt < MaxRecordedEvents) { + cur_evnt_kind = _recorded_events[cur_evnt]; + cur_evnt_ind = _recorded_event_index[cur_evnt]; + } + } + gclog_or_tty->print("Added card " PTR_FORMAT " to region [" PTR_FORMAT "...]" + " for ref " PTR_FORMAT ".\n", + _recorded_cards[i], _recorded_regions[i]->bottom(), + _recorded_oops[i]); + } +} + +#ifndef PRODUCT +void HeapRegionRemSet::test() { + os::sleep(Thread::current(), (jlong)5000, false); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same + // hash bucket. + HeapRegion* hr0 = g1h->region_at(0); + HeapRegion* hr1 = g1h->region_at(1); + HeapRegion* hr2 = g1h->region_at(5); + HeapRegion* hr3 = g1h->region_at(6); + HeapRegion* hr4 = g1h->region_at(7); + HeapRegion* hr5 = g1h->region_at(8); + + HeapWord* hr1_start = hr1->bottom(); + HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2; + HeapWord* hr1_last = hr1->end() - 1; + + HeapWord* hr2_start = hr2->bottom(); + HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2; + HeapWord* hr2_last = hr2->end() - 1; + + HeapWord* hr3_start = hr3->bottom(); + HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2; + HeapWord* hr3_last = hr3->end() - 1; + + HeapRegionRemSet* hrrs = hr0->rem_set(); + + // Make three references from region 0x101... + hrrs->add_reference((oop*)hr1_start); + hrrs->add_reference((oop*)hr1_mid); + hrrs->add_reference((oop*)hr1_last); + + hrrs->add_reference((oop*)hr2_start); + hrrs->add_reference((oop*)hr2_mid); + hrrs->add_reference((oop*)hr2_last); + + hrrs->add_reference((oop*)hr3_start); + hrrs->add_reference((oop*)hr3_mid); + hrrs->add_reference((oop*)hr3_last); + + // Now cause a coarsening. + hrrs->add_reference((oop*)hr4->bottom()); + hrrs->add_reference((oop*)hr5->bottom()); + + // Now, does iteration yield these three? + HeapRegionRemSetIterator iter; + hrrs->init_iterator(&iter); + size_t sum = 0; + size_t card_index; + while (iter.has_next(card_index)) { + HeapWord* card_start = + G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); + gclog_or_tty->print_cr(" Card " PTR_FORMAT ".", card_start); + sum++; + } + guarantee(sum == 11 - 3 + 2048, "Failure"); + guarantee(sum == hrrs->occupied(), "Failure"); +} +#endif diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,470 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Remembered set for a heap region. Represent a set of "cards" that +// contain pointers into the owner heap region. Cards are defined somewhat +// abstractly, in terms of what the "BlockOffsetTable" in use can parse. + +class G1CollectedHeap; +class G1BlockOffsetSharedArray; +class HeapRegion; +class HeapRegionRemSetIterator; +class PosParPRT; +class SparsePRT; + + +// The "_coarse_map" is a bitmap with one bit for each region, where set +// bits indicate that the corresponding region may contain some pointer +// into the owning region. + +// The "_fine_grain_entries" array is an open hash table of PerRegionTables +// (PRTs), indicating regions for which we're keeping the RS as a set of +// cards. The strategy is to cap the size of the fine-grain table, +// deleting an entry and setting the corresponding coarse-grained bit when +// we would overflow this cap. + +// We use a mixture of locking and lock-free techniques here. We allow +// threads to locate PRTs without locking, but threads attempting to alter +// a bucket list obtain a lock. This means that any failing attempt to +// find a PRT must be retried with the lock. It might seem dangerous that +// a read can find a PRT that is concurrently deleted. This is all right, +// because: +// +// 1) We only actually free PRT's at safe points (though we reuse them at +// other times). +// 2) We find PRT's in an attempt to add entries. If a PRT is deleted, +// it's _coarse_map bit is set, so the that we were attempting to add +// is represented. If a deleted PRT is re-used, a thread adding a bit, +// thinking the PRT is for a different region, does no harm. + +class OtherRegionsTable: public CHeapObj { + friend class HeapRegionRemSetIterator; + + G1CollectedHeap* _g1h; + Mutex _m; + HeapRegion* _hr; + + // These are protected by "_m". + BitMap _coarse_map; + size_t _n_coarse_entries; + static jint _n_coarsenings; + + PosParPRT** _fine_grain_regions; + size_t _n_fine_entries; + +#define SAMPLE_FOR_EVICTION 1 +#if SAMPLE_FOR_EVICTION + size_t _fine_eviction_start; + static size_t _fine_eviction_stride; + static size_t _fine_eviction_sample_size; +#endif + + SparsePRT _sparse_table; + + // These are static after init. + static size_t _max_fine_entries; + static size_t _mod_max_fine_entries_mask; + + // Requires "prt" to be the first element of the bucket list appropriate + // for "hr". If this list contains an entry for "hr", return it, + // otherwise return "NULL". + PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const; + + // Find, delete, and return a candidate PosParPRT, if any exists, + // adding the deleted region to the coarse bitmap. Requires the caller + // to hold _m, and the fine-grain table to be full. + PosParPRT* delete_region_table(); + + // If a PRT for "hr" is in the bucket list indicated by "ind" (which must + // be the correct index for "hr"), delete it and return true; else return + // false. + bool del_single_region_table(size_t ind, HeapRegion* hr); + + static jint _cache_probes; + static jint _cache_hits; + + // Indexed by thread X heap region, to minimize thread contention. + static int** _from_card_cache; + static size_t _from_card_cache_max_regions; + static size_t _from_card_cache_mem_size; + +public: + OtherRegionsTable(HeapRegion* hr); + + HeapRegion* hr() const { return _hr; } + + // For now. Could "expand" some tables in the future, so that this made + // sense. + void add_reference(oop* from, int tid); + + void add_reference(oop* from) { + return add_reference(from, 0); + } + + // Removes any entries shown by the given bitmaps to contain only dead + // objects. + void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); + + // Not const because it takes a lock. + size_t occupied() const; + size_t occ_fine() const; + size_t occ_coarse() const; + size_t occ_sparse() const; + + static jint n_coarsenings() { return _n_coarsenings; } + + // Returns size in bytes. + // Not const because it takes a lock. + size_t mem_size() const; + static size_t static_mem_size(); + static size_t fl_mem_size(); + + bool contains_reference(oop* from) const; + bool contains_reference_locked(oop* from) const; + + void clear(); + + // Specifically clear the from_card_cache. + void clear_fcc(); + + // "from_hr" is being cleared; remove any entries from it. + void clear_incoming_entry(HeapRegion* from_hr); + + // Declare the heap size (in # of regions) to the OtherRegionsTable. + // (Uses it to initialize from_card_cache). + static void init_from_card_cache(size_t max_regions); + + // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. + // Make sure any entries for higher regions are invalid. + static void shrink_from_card_cache(size_t new_n_regs); + + static void print_from_card_cache(); + +}; + + +class HeapRegionRemSet : public CHeapObj { + friend class VMStructs; + friend class HeapRegionRemSetIterator; + +public: + enum Event { + Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd + }; + +private: + G1BlockOffsetSharedArray* _bosa; + G1BlockOffsetSharedArray* bosa() const { return _bosa; } + + static bool _par_traversal; + + OtherRegionsTable _other_regions; + + // One set bit for every region that has an entry for this one. + BitMap _outgoing_region_map; + + // Clear entries for the current region in any rem sets named in + // the _outgoing_region_map. + void clear_outgoing_entries(); + +#if MAYBE + // Audit the given card index. + void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr, + HeapRegionRemSet* empty_cards, size_t* one_obj_cards); + + // Assumes that "audit_stage1" has been called for "hr", to set up + // "shadow" and "new_rs" appropriately. Identifies individual popular + // objects; returns "true" if any are found. + bool audit_find_pop(HeapRegion* hr, u2* rc_arr); + + // Assumes that "audit_stage1" has been called for "hr", to set up + // "shadow" and "new_rs" appropriately. Identifies individual popular + // objects, and determines the number of entries in "new_rs" if any such + // popular objects are ignored. If this is sufficiently small, returns + // "false" to indicate that a constraint should not be introduced. + // Otherwise, returns "true" to indicate that we should go ahead with + // adding the constraint. + bool audit_stag(HeapRegion* hr, u2* rc_arr); + + + u2* alloc_rc_array(); + + SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds, + SeqHeapRegionRemSet* empty_cards); +#endif + + enum ParIterState { Unclaimed, Claimed, Complete }; + ParIterState _iter_state; + + // Unused unless G1RecordHRRSOops is true. + + static const int MaxRecorded = 1000000; + static oop** _recorded_oops; + static HeapWord** _recorded_cards; + static HeapRegion** _recorded_regions; + static int _n_recorded; + + static const int MaxRecordedEvents = 1000; + static Event* _recorded_events; + static int* _recorded_event_index; + static int _n_recorded_events; + + static void print_event(outputStream* str, Event evnt); + +public: + HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, + HeapRegion* hr); + + static int num_par_rem_sets(); + static bool par_traversal() { return _par_traversal; } + static void set_par_traversal(bool b); + + HeapRegion* hr() const { + return _other_regions.hr(); + } + + size_t occupied() const { + return _other_regions.occupied(); + } + size_t occ_fine() const { + return _other_regions.occ_fine(); + } + size_t occ_coarse() const { + return _other_regions.occ_coarse(); + } + size_t occ_sparse() const { + return _other_regions.occ_sparse(); + } + + static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); } + + /* Used in the sequential case. Returns "true" iff this addition causes + the size limit to be reached. */ + bool add_reference(oop* from) { + _other_regions.add_reference(from); + return false; + } + + /* Used in the parallel case. Returns "true" iff this addition causes + the size limit to be reached. */ + bool add_reference(oop* from, int tid) { + _other_regions.add_reference(from, tid); + return false; + } + + // Records the fact that the current region contains an outgoing + // reference into "to_hr". + void add_outgoing_reference(HeapRegion* to_hr); + + // Removes any entries shown by the given bitmaps to contain only dead + // objects. + void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); + + // The region is being reclaimed; clear its remset, and any mention of + // entries for this region in other remsets. + void clear(); + + // Forget any entries due to pointers from "from_hr". + void clear_incoming_entry(HeapRegion* from_hr) { + _other_regions.clear_incoming_entry(from_hr); + } + +#if 0 + virtual void cleanup() = 0; +#endif + + // Should be called from single-threaded code. + void init_for_par_iteration(); + // Attempt to claim the region. Returns true iff this call caused an + // atomic transition from Unclaimed to Claimed. + bool claim_iter(); + // Sets the iteration state to "complete". + void set_iter_complete(); + // Returns "true" iff the region's iteration is complete. + bool iter_is_complete(); + + // Initialize the given iterator to iterate over this rem set. + void init_iterator(HeapRegionRemSetIterator* iter) const; + +#if 0 + // Apply the "do_card" method to the start address of every card in the + // rem set. Returns false if some application of the closure aborted. + virtual bool card_iterate(CardClosure* iter) = 0; +#endif + + // The actual # of bytes this hr_remset takes up. + size_t mem_size() { + return _other_regions.mem_size() + // This correction is necessary because the above includes the second + // part. + + sizeof(this) - sizeof(OtherRegionsTable); + } + + // Returns the memory occupancy of all static data structures associated + // with remembered sets. + static size_t static_mem_size() { + return OtherRegionsTable::static_mem_size(); + } + + // Returns the memory occupancy of all free_list data structures associated + // with remembered sets. + static size_t fl_mem_size() { + return OtherRegionsTable::fl_mem_size(); + } + + bool contains_reference(oop* from) const { + return _other_regions.contains_reference(from); + } + void print() const; + +#if MAYBE + // We are about to introduce a constraint, requiring the collection time + // of the region owning this RS to be <= "hr", and forgetting pointers + // from the owning region to "hr." Before doing so, examines this rem + // set for pointers to "hr", possibly identifying some popular objects., + // and possibly finding some cards to no longer contain pointers to "hr", + // + // These steps may prevent the the constraint from being necessary; in + // which case returns a set of cards now thought to contain no pointers + // into HR. In the normal (I assume) case, returns NULL, indicating that + // we should go ahead and add the constraint. + virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0; +#endif + + // Called during a stop-world phase to perform any deferred cleanups. + // The second version may be called by parallel threads after then finish + // collection work. + static void cleanup(); + static void par_cleanup(); + + // Declare the heap size (in # of regions) to the HeapRegionRemSet(s). + // (Uses it to initialize from_card_cache). + static void init_heap(size_t max_regions) { + OtherRegionsTable::init_from_card_cache(max_regions); + } + + // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. + static void shrink_heap(size_t new_n_regs) { + OtherRegionsTable::shrink_from_card_cache(new_n_regs); + } + +#ifndef PRODUCT + static void print_from_card_cache() { + OtherRegionsTable::print_from_card_cache(); + } +#endif + + static void record(HeapRegion* hr, oop* f); + static void print_recorded(); + static void record_event(Event evnt); + + // Run unit tests. +#ifndef PRODUCT + static void test(); +#endif + +}; + +class HeapRegionRemSetIterator : public CHeapObj { + + // The region over which we're iterating. + const HeapRegionRemSet* _hrrs; + + // Local caching of HRRS fields. + const BitMap* _coarse_map; + PosParPRT** _fine_grain_regions; + + G1BlockOffsetSharedArray* _bosa; + G1CollectedHeap* _g1h; + + // The number yielded since initialization. + size_t _n_yielded_fine; + size_t _n_yielded_coarse; + size_t _n_yielded_sparse; + + // If true we're iterating over the coarse table; if false the fine + // table. + enum IterState { + Sparse, + Fine, + Coarse + }; + IterState _is; + + // In both kinds of iteration, heap offset of first card of current + // region. + size_t _cur_region_card_offset; + // Card offset within cur region. + size_t _cur_region_cur_card; + + // Coarse table iteration fields: + + // Current region index; + int _coarse_cur_region_index; + int _coarse_cur_region_cur_card; + + bool coarse_has_next(size_t& card_index); + + // Fine table iteration fields: + + // Index of bucket-list we're working on. + int _fine_array_index; + // Per Region Table we're doing within current bucket list. + PosParPRT* _fine_cur_prt; + + /* SparsePRT::*/ SparsePRTIter _sparse_iter; + + void fine_find_next_non_null_prt(); + + bool fine_has_next(); + bool fine_has_next(size_t& card_index); + +public: + // We require an iterator to be initialized before use, so the + // constructor does little. + HeapRegionRemSetIterator(); + + void initialize(const HeapRegionRemSet* hrrs); + + // If there remains one or more cards to be yielded, returns true and + // sets "card_index" to one of those cards (which is then considered + // yielded.) Otherwise, returns false (and leaves "card_index" + // undefined.) + bool has_next(size_t& card_index); + + size_t n_yielded_fine() { return _n_yielded_fine; } + size_t n_yielded_coarse() { return _n_yielded_coarse; } + size_t n_yielded_sparse() { return _n_yielded_sparse; } + size_t n_yielded() { + return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse(); + } +}; + +#if 0 +class CardClosure: public Closure { +public: + virtual void do_card(HeapWord* card_start) = 0; +}; + +#endif diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,344 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegionSeq.cpp.incl" + +// Local to this file. + +static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) { + if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1; + else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1; + else if (*hr1p == *hr2p) return 0; + else { + assert(false, "We should never compare distinct overlapping regions."); + } + return 0; +} + +HeapRegionSeq::HeapRegionSeq() : + _alloc_search_start(0), + // The line below is the worst bit of C++ hackery I've ever written + // (Detlefs, 11/23). You should think of it as equivalent to + // "_regions(100, true)": initialize the growable array and inform it + // that it should allocate its elem array(s) on the C heap. The first + // argument, however, is actually a comma expression (new-expr, 100). + // The purpose of the new_expr is to inform the growable array that it + // is *already* allocated on the C heap: it uses the placement syntax to + // keep it from actually doing any allocation. + _regions((ResourceObj::operator new (sizeof(GrowableArray), + (void*)&_regions, + ResourceObj::C_HEAP), + 100), + true), + _next_rr_candidate(0), + _seq_bottom(NULL) +{} + +// Private methods. + +HeapWord* +HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) { + assert(G1CollectedHeap::isHumongous(word_size), + "Allocation size should be humongous"); + int cur = ind; + int first = cur; + size_t sumSizes = 0; + while (cur < _regions.length() && sumSizes < word_size) { + // Loop invariant: + // For all i in [first, cur): + // _regions.at(i)->is_empty() + // && _regions.at(i) is contiguous with its predecessor, if any + // && sumSizes is the sum of the sizes of the regions in the interval + // [first, cur) + HeapRegion* curhr = _regions.at(cur); + if (curhr->is_empty() + && !curhr->is_reserved() + && (first == cur + || (_regions.at(cur-1)->end() == + curhr->bottom()))) { + sumSizes += curhr->capacity() / HeapWordSize; + } else { + first = cur + 1; + sumSizes = 0; + } + cur++; + } + if (sumSizes >= word_size) { + _alloc_search_start = cur; + // Mark the allocated regions as allocated. + bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled(); + HeapRegion* first_hr = _regions.at(first); + for (int i = first; i < cur; i++) { + HeapRegion* hr = _regions.at(i); + if (zf) + hr->ensure_zero_filled(); + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + hr->set_zero_fill_allocated(); + } + size_t sz = hr->capacity() / HeapWordSize; + HeapWord* tmp = hr->allocate(sz); + assert(tmp != NULL, "Humongous allocation failure"); + MemRegion mr = MemRegion(tmp, sz); + SharedHeap::fill_region_with_object(mr); + hr->declare_filled_region_to_BOT(mr); + if (i == first) { + first_hr->set_startsHumongous(); + } else { + assert(i > first, "sanity"); + hr->set_continuesHumongous(first_hr); + } + } + HeapWord* first_hr_bot = first_hr->bottom(); + HeapWord* obj_end = first_hr_bot + word_size; + first_hr->set_top(obj_end); + return first_hr_bot; + } else { + // If we started from the beginning, we want to know why we can't alloc. + return NULL; + } +} + +void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) { + int empty_run = 0; + int n_empty = 0; + bool at_least_one_reserved = false; + int empty_run_start; + for (int i = 0; i < _regions.length(); i++) { + HeapRegion* r = _regions.at(i); + if (r->continuesHumongous()) continue; + if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) { + assert(!r->isHumongous(), "H regions should not be empty."); + if (empty_run == 0) empty_run_start = i; + empty_run++; + n_empty++; + if (r->is_reserved()) { + at_least_one_reserved = true; + } + } else { + if (empty_run > 0) { + gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); + if (reserved_are_empty && at_least_one_reserved) + gclog_or_tty->print("(R)"); + empty_run = 0; + at_least_one_reserved = false; + } + } + } + if (empty_run > 0) { + gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); + if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)"); + } + gclog_or_tty->print_cr(" [tot = %d]", n_empty); +} + +int HeapRegionSeq::find(HeapRegion* hr) { + // FIXME: optimized for adjacent regions of fixed size. + int ind = hr->hrs_index(); + if (ind != -1) { + assert(_regions.at(ind) == hr, "Mismatch"); + } + return ind; +} + + +// Public methods. + +void HeapRegionSeq::insert(HeapRegion* hr) { + if (_regions.length() == 0 + || _regions.top()->end() <= hr->bottom()) { + hr->set_hrs_index(_regions.length()); + _regions.append(hr); + } else { + _regions.append(hr); + _regions.sort(orderRegions); + for (int i = 0; i < _regions.length(); i++) { + _regions.at(i)->set_hrs_index(i); + } + } + char* bot = (char*)_regions.at(0)->bottom(); + if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot; +} + +size_t HeapRegionSeq::length() { + return _regions.length(); +} + +size_t HeapRegionSeq::free_suffix() { + size_t res = 0; + int first = _regions.length() - 1; + int cur = first; + while (cur >= 0 && + (_regions.at(cur)->is_empty() + && !_regions.at(cur)->is_reserved() + && (first == cur + || (_regions.at(cur+1)->bottom() == + _regions.at(cur)->end())))) { + res++; + cur--; + } + return res; +} + +HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) { + int cur = _alloc_search_start; + // Make sure "cur" is a valid index. + assert(cur >= 0, "Invariant."); + HeapWord* res = alloc_obj_from_region_index(cur, word_size); + if (res == NULL) + res = alloc_obj_from_region_index(0, word_size); + return res; +} + +void HeapRegionSeq::iterate(HeapRegionClosure* blk) { + iterate_from((HeapRegion*)NULL, blk); +} + +// The first argument r is the heap region at which iteration begins. +// This operation runs fastest when r is NULL, or the heap region for +// which a HeapRegionClosure most recently returned true, or the +// heap region immediately to its right in the sequence. In all +// other cases a linear search is required to find the index of r. + +void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) { + + // :::: FIXME :::: + // Static cache value is bad, especially when we start doing parallel + // remembered set update. For now just don't cache anything (the + // code in the def'd out blocks). + +#if 0 + static int cached_j = 0; +#endif + int len = _regions.length(); + int j = 0; + // Find the index of r. + if (r != NULL) { +#if 0 + assert(cached_j >= 0, "Invariant."); + if ((cached_j < len) && (r == _regions.at(cached_j))) { + j = cached_j; + } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) { + j = cached_j + 1; + } else { + j = find(r); +#endif + if (j < 0) { + j = 0; + } +#if 0 + } +#endif + } + int i; + for (i = j; i < len; i += 1) { + int res = blk->doHeapRegion(_regions.at(i)); + if (res) { +#if 0 + cached_j = i; +#endif + blk->incomplete(); + return; + } + } + for (i = 0; i < j; i += 1) { + int res = blk->doHeapRegion(_regions.at(i)); + if (res) { +#if 0 + cached_j = i; +#endif + blk->incomplete(); + return; + } + } +} + +void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) { + int len = _regions.length(); + int i; + for (i = idx; i < len; i++) { + if (blk->doHeapRegion(_regions.at(i))) { + blk->incomplete(); + return; + } + } + for (i = 0; i < idx; i++) { + if (blk->doHeapRegion(_regions.at(i))) { + blk->incomplete(); + return; + } + } +} + +MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes, + size_t& num_regions_deleted) { + assert(shrink_bytes % os::vm_page_size() == 0, "unaligned"); + assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned"); + + if (_regions.length() == 0) { + num_regions_deleted = 0; + return MemRegion(); + } + int j = _regions.length() - 1; + HeapWord* end = _regions.at(j)->end(); + HeapWord* last_start = end; + while (j >= 0 && shrink_bytes > 0) { + HeapRegion* cur = _regions.at(j); + // We have to leave humongous regions where they are, + // and work around them. + if (cur->isHumongous()) { + return MemRegion(last_start, end); + } + cur->reset_zero_fill(); + assert(cur == _regions.top(), "Should be top"); + if (!cur->is_empty()) break; + shrink_bytes -= cur->capacity(); + num_regions_deleted++; + _regions.pop(); + last_start = cur->bottom(); + // We need to delete these somehow, but can't currently do so here: if + // we do, the ZF thread may still access the deleted region. We'll + // leave this here as a reminder that we have to do something about + // this. + // delete cur; + j--; + } + return MemRegion(last_start, end); +} + + +class PrintHeapRegionClosure : public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + gclog_or_tty->print(PTR_FORMAT ":", r); + r->print(); + return false; + } +}; + +void HeapRegionSeq::print() { + PrintHeapRegionClosure cl; + iterate(&cl); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,111 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class HeapRegion; +class HeapRegionClosure; + +class HeapRegionSeq: public CHeapObj { + + // _regions is kept sorted by start address order, and no two regions are + // overlapping. + GrowableArray _regions; + + // The index in "_regions" at which to start the next allocation search. + // (For efficiency only; private to obj_allocate after initialization.) + int _alloc_search_start; + + // Attempts to allocate a block of the (assumed humongous) word_size, + // starting at the region "ind". + HeapWord* alloc_obj_from_region_index(int ind, size_t word_size); + + // Currently, we're choosing collection sets in a round-robin fashion, + // starting here. + int _next_rr_candidate; + + // The bottom address of the bottom-most region, or else NULL if there + // are no regions in the sequence. + char* _seq_bottom; + + public: + // Initializes "this" to the empty sequence of regions. + HeapRegionSeq(); + + // Adds "hr" to "this" sequence. Requires "hr" not to overlap with + // any region already in "this". (Will perform better if regions are + // inserted in ascending address order.) + void insert(HeapRegion* hr); + + // Given a HeapRegion*, returns its index within _regions, + // or returns -1 if not found. + int find(HeapRegion* hr); + + // Requires the index to be valid, and return the region at the index. + HeapRegion* at(size_t i) { return _regions.at((int)i); } + + // Return the number of regions in the sequence. + size_t length(); + + // Returns the number of contiguous regions at the end of the sequence + // that are available for allocation. + size_t free_suffix(); + + // Requires "word_size" to be humongous (in the technical sense). If + // possible, allocates a contiguous subsequence of the heap regions to + // satisfy the allocation, and returns the address of the beginning of + // that sequence, otherwise returns NULL. + HeapWord* obj_allocate(size_t word_size); + + // Apply the "doHeapRegion" method of "blk" to all regions in "this", + // in address order, terminating the iteration early + // if the "doHeapRegion" method returns "true". + void iterate(HeapRegionClosure* blk); + + // Apply the "doHeapRegion" method of "blk" to all regions in "this", + // starting at "r" (or first region, if "r" is NULL), in a circular + // manner, terminating the iteration early if the "doHeapRegion" method + // returns "true". + void iterate_from(HeapRegion* r, HeapRegionClosure* blk); + + // As above, but start from a given index in the sequence + // instead of a given heap region. + void iterate_from(int idx, HeapRegionClosure* blk); + + // Requires "shrink_bytes" to be a multiple of the page size and heap + // region granularity. Deletes as many "rightmost" completely free heap + // regions from the sequence as comprise shrink_bytes bytes. Returns the + // MemRegion indicating the region those regions comprised, and sets + // "num_regions_deleted" to the number of regions deleted. + MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted); + + // If "addr" falls within a region in the sequence, return that region, + // or else NULL. + HeapRegion* addr_to_region(const void* addr); + + void print(); + + // Prints out runs of empty regions. If the arg is "true" reserved + // (popular regions are considered "empty". + void print_empty_runs(bool reserved_are_empty); + +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,40 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) { + assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region"); + if ((char*) addr >= _seq_bottom) { + size_t diff = (size_t) pointer_delta((HeapWord*) addr, + (HeapWord*) _seq_bottom); + int index = (int) (diff >> HeapRegion::LogOfHRGrainWords); + assert(index >= 0, "invariant / paranoia"); + if (index < _regions.length()) { + HeapRegion* hr = _regions.at(index); + assert(hr->is_in_reserved(addr), + "addr_to_region is wrong..."); + return hr; + } + } + return NULL; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,208 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_ptrQueue.cpp.incl" + +PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) : + _qset(qset_), _buf(NULL), _index(0), _active(false), + _perm(perm), _lock(NULL) +{} + +PtrQueue::~PtrQueue() { + if (!_perm && _buf != NULL) { + if (_index == _sz) { + // No work to do. + qset()->deallocate_buffer(_buf); + } else { + // We must NULL out the unused entries, then enqueue. + for (size_t i = 0; i < _index; i += oopSize) { + _buf[byte_index_to_index((int)i)] = NULL; + } + qset()->enqueue_complete_buffer(_buf); + _buf = NULL; + } + } +} + + +static int byte_index_to_index(int ind) { + assert((ind % oopSize) == 0, "Invariant."); + return ind / oopSize; +} + +static int index_to_byte_index(int byte_ind) { + return byte_ind * oopSize; +} + +void PtrQueue::enqueue_known_active(void* ptr) { + assert(0 <= _index && _index <= _sz, "Invariant."); + assert(_index == 0 || _buf != NULL, "invariant"); + + while (_index == 0) { + handle_zero_index(); + } + assert(_index > 0, "postcondition"); + + _index -= oopSize; + _buf[byte_index_to_index((int)_index)] = ptr; + assert(0 <= _index && _index <= _sz, "Invariant."); +} + +void PtrQueue::locking_enqueue_completed_buffer(void** buf) { + assert(_lock->owned_by_self(), "Required."); + _lock->unlock(); + qset()->enqueue_complete_buffer(buf); + // We must relock only because the caller will unlock, for the normal + // case. + _lock->lock_without_safepoint_check(); +} + + +PtrQueueSet::PtrQueueSet(bool notify_when_complete) : + _max_completed_queue(0), + _cbl_mon(NULL), _fl_lock(NULL), + _notify_when_complete(notify_when_complete), + _sz(0), + _completed_buffers_head(NULL), + _completed_buffers_tail(NULL), + _n_completed_buffers(0), + _process_completed_threshold(0), _process_completed(false), + _buf_free_list(NULL), _buf_free_list_sz(0) +{} + +void** PtrQueueSet::allocate_buffer() { + assert(_sz > 0, "Didn't set a buffer size."); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void** res = _buf_free_list; + _buf_free_list = (void**)_buf_free_list[0]; + _buf_free_list_sz--; + // Just override the next pointer with NULL, just in case we scan this part + // of the buffer. + res[0] = NULL; + return res; + } else { + return NEW_C_HEAP_ARRAY(void*, _sz); + } +} + +void PtrQueueSet::deallocate_buffer(void** buf) { + assert(_sz > 0, "Didn't set a buffer size."); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_buf_free_list; + _buf_free_list = buf; + _buf_free_list_sz++; +} + +void PtrQueueSet::reduce_free_list() { + // For now we'll adopt the strategy of deleting half. + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + size_t n = _buf_free_list_sz / 2; + while (n > 0) { + assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); + void** head = _buf_free_list; + _buf_free_list = (void**)_buf_free_list[0]; + FREE_C_HEAP_ARRAY(void*,head); + n--; + } +} + +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { + // I use explicit locking here because there's a bailout in the middle. + _cbl_mon->lock_without_safepoint_check(); + + Thread* thread = Thread::current(); + assert( ignore_max_completed || + thread->is_Java_thread() || + SafepointSynchronize::is_at_safepoint(), + "invariant" ); + ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); + + if (!ignore_max_completed && _max_completed_queue > 0 && + _n_completed_buffers >= (size_t) _max_completed_queue) { + _cbl_mon->unlock(); + bool b = mut_process_buffer(buf); + if (b) { + deallocate_buffer(buf); + return; + } + + // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. + _cbl_mon->lock_without_safepoint_check(); + } + + // Here we still hold the _cbl_mon. + CompletedBufferNode* cbn = new CompletedBufferNode; + cbn->buf = buf; + cbn->next = NULL; + cbn->index = index; + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = cbn; + _completed_buffers_tail = cbn; + } else { + _completed_buffers_tail->next = cbn; + _completed_buffers_tail = cbn; + } + _n_completed_buffers++; + + if (!_process_completed && + _n_completed_buffers == _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } + debug_only(assert_completed_buffer_list_len_correct_locked()); + _cbl_mon->unlock(); +} + +int PtrQueueSet::completed_buffers_list_length() { + int n = 0; + CompletedBufferNode* cbn = _completed_buffers_head; + while (cbn != NULL) { + n++; + cbn = cbn->next; + } + return n; +} + +void PtrQueueSet::assert_completed_buffer_list_len_correct() { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + assert_completed_buffer_list_len_correct_locked(); +} + +void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { + guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, + "Completed buffer length is wrong."); +} + +void PtrQueueSet::set_buffer_size(size_t sz) { + assert(_sz == 0 && sz > 0, "Should be called only once."); + _sz = sz * oopSize; +} + +void PtrQueueSet::set_process_completed_threshold(size_t sz) { + _process_completed_threshold = sz; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,229 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// There are various techniques that require threads to be able to log +// addresses. For example, a generational write barrier might log +// the addresses of modified old-generation objects. This type supports +// this operation. + +class PtrQueueSet; + +class PtrQueue: public CHeapObj { + +protected: + // The ptr queue set to which this queue belongs. + PtrQueueSet* _qset; + + // Whether updates should be logged. + bool _active; + + // The buffer. + void** _buf; + // The index at which an object was last enqueued. Starts at "_sz" + // (indicating an empty buffer) and goes towards zero. + size_t _index; + + // The size of the buffer. + size_t _sz; + + // If true, the queue is permanent, and doesn't need to deallocate + // its buffer in the destructor (since that obtains a lock which may not + // be legally locked by then. + bool _perm; + + // If there is a lock associated with this buffer, this is that lock. + Mutex* _lock; + + PtrQueueSet* qset() { return _qset; } + +public: + // Initialize this queue to contain a null buffer, and be part of the + // given PtrQueueSet. + PtrQueue(PtrQueueSet*, bool perm = false); + // Release any contained resources. + ~PtrQueue(); + + // Associate a lock with a ptr queue. + void set_lock(Mutex* lock) { _lock = lock; } + + void reset() { if (_buf != NULL) _index = _sz; } + + // Enqueues the given "obj". + void enqueue(void* ptr) { + if (!_active) return; + else enqueue_known_active(ptr); + } + + inline void handle_zero_index(); + void locking_enqueue_completed_buffer(void** buf); + + void enqueue_known_active(void* ptr); + + size_t size() { + assert(_sz >= _index, "Invariant."); + return _buf == NULL ? 0 : _sz - _index; + } + + // Set the "active" property of the queue to "b". An enqueue to an + // inactive thread is a no-op. Setting a queue to inactive resets its + // log to the empty state. + void set_active(bool b) { + _active = b; + if (!b && _buf != NULL) { + _index = _sz; + } else if (b && _buf != NULL) { + assert(_index == _sz, "invariant: queues are empty when activated."); + } + } + + static int byte_index_to_index(int ind) { + assert((ind % oopSize) == 0, "Invariant."); + return ind / oopSize; + } + + static int index_to_byte_index(int byte_ind) { + return byte_ind * oopSize; + } + + // To support compiler. + static ByteSize byte_offset_of_index() { + return byte_offset_of(PtrQueue, _index); + } + static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); } + + static ByteSize byte_offset_of_buf() { + return byte_offset_of(PtrQueue, _buf); + } + static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); } + + static ByteSize byte_offset_of_active() { + return byte_offset_of(PtrQueue, _active); + } + static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); } + +}; + +// A PtrQueueSet represents resources common to a set of pointer queues. +// In particular, the individual queues allocate buffers from this shared +// set, and return completed buffers to the set. +// All these variables are are protected by the TLOQ_CBL_mon. XXX ??? +class PtrQueueSet: public CHeapObj { + +protected: + + class CompletedBufferNode: public CHeapObj { + public: + void** buf; + size_t index; + CompletedBufferNode* next; + CompletedBufferNode() : buf(NULL), + index(0), next(NULL){ } + }; + + Monitor* _cbl_mon; // Protects the fields below. + CompletedBufferNode* _completed_buffers_head; + CompletedBufferNode* _completed_buffers_tail; + size_t _n_completed_buffers; + size_t _process_completed_threshold; + volatile bool _process_completed; + + // This (and the interpretation of the first element as a "next" + // pointer) are protected by the TLOQ_FL_lock. + Mutex* _fl_lock; + void** _buf_free_list; + size_t _buf_free_list_sz; + + // The size of all buffers in the set. + size_t _sz; + + bool _all_active; + + // If true, notify_all on _cbl_mon when the threshold is reached. + bool _notify_when_complete; + + // Maximum number of elements allowed on completed queue: after that, + // enqueuer does the work itself. Zero indicates no maximum. + int _max_completed_queue; + + int completed_buffers_list_length(); + void assert_completed_buffer_list_len_correct_locked(); + void assert_completed_buffer_list_len_correct(); + +protected: + // A mutator thread does the the work of processing a buffer. + // Returns "true" iff the work is complete (and the buffer may be + // deallocated). + virtual bool mut_process_buffer(void** buf) { + ShouldNotReachHere(); + return false; + } + +public: + // Create an empty ptr queue set. + PtrQueueSet(bool notify_when_complete = false); + + // Because of init-order concerns, we can't pass these as constructor + // arguments. + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0) { + _max_completed_queue = max_completed_queue; + assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); + _cbl_mon = cbl_mon; _fl_lock = fl_lock; + } + + // Return an empty oop array of size _sz (required to be non-zero). + void** allocate_buffer(); + + // Return an empty buffer to the free list. The "buf" argument is + // required to be a pointer to the head of an array of length "_sz". + void deallocate_buffer(void** buf); + + // Declares that "buf" is a complete buffer. + void enqueue_complete_buffer(void** buf, size_t index = 0, + bool ignore_max_completed = false); + + bool completed_buffers_exist_dirty() { + return _n_completed_buffers > 0; + } + + bool process_completed_buffers() { return _process_completed; } + + bool active() { return _all_active; } + + // Set the buffer size. Should be called before any "enqueue" operation + // can be called. And should only be called once. + void set_buffer_size(size_t sz); + + // Get the buffer size. + size_t buffer_size() { return _sz; } + + // Set the number of completed buffers that triggers log processing. + void set_process_completed_threshold(size_t sz); + + // Must only be called at a safe point. Indicates that the buffer free + // list size may be reduced, if that is deemed desirable. + void reduce_free_list(); + + size_t completed_buffers_num() { return _n_completed_buffers; } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,41 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +void PtrQueue::handle_zero_index() { + assert(0 == _index, "Precondition."); + // This thread records the full buffer and allocates a new one (while + // holding the lock if there is one). + void** buf = _buf; + _buf = qset()->allocate_buffer(); + _sz = qset()->buffer_size(); + _index = _sz; + assert(0 <= _index && _index <= _sz, "Invariant."); + if (buf != NULL) { + if (_lock) { + locking_enqueue_completed_buffer(buf); + } else { + qset()->enqueue_complete_buffer(buf); + } + } +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/satbQueue.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,160 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_satbQueue.cpp.incl" + +void ObjPtrQueue::apply_closure(ObjectClosure* cl) { + if (_buf != NULL) { + apply_closure_to_buffer(cl, _buf, _index, _sz); + _index = _sz; + } +} + +void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl, + void** buf, size_t index, size_t sz) { + if (cl == NULL) return; + for (size_t i = index; i < sz; i += oopSize) { + oop obj = (oop)buf[byte_index_to_index((int)i)]; + // There can be NULL entries because of destructors. + if (obj != NULL) { + cl->do_object(obj); + } + } +} +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +SATBMarkQueueSet::SATBMarkQueueSet() : + PtrQueueSet(), + _closure(NULL), _par_closures(NULL), + _shared_satb_queue(this, true /*perm*/) +{} + +void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue, + Mutex* lock) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + _shared_satb_queue.set_lock(lock); + if (ParallelGCThreads > 0) { + _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); + } +} + + +void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) { + t->satb_mark_queue().handle_zero_index(); +} + +void SATBMarkQueueSet::set_active_all_threads(bool b) { + _all_active = b; + for(JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().set_active(b); + } +} + +void SATBMarkQueueSet::set_closure(ObjectClosure* closure) { + _closure = closure; +} + +void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) { + assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition"); + _par_closures[i] = par_closure; +} + +void SATBMarkQueueSet::iterate_closure_all_threads() { + for(JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().apply_closure(_closure); + } + shared_satb_queue()->apply_closure(_closure); +} + +void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) { + SharedHeap* sh = SharedHeap::heap(); + int parity = sh->strong_roots_parity(); + + for(JavaThread* t = Threads::first(); t; t = t->next()) { + if (t->claim_oops_do(true, parity)) { + t->satb_mark_queue().apply_closure(_par_closures[worker]); + } + } + // We'll have worker 0 do this one. + if (worker == 0) { + shared_satb_queue()->apply_closure(_par_closures[0]); + } +} + +bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, + int worker) { + CompletedBufferNode* nd = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_head != NULL) { + nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; + _n_completed_buffers--; + if (_n_completed_buffers == 0) _process_completed = false; + } + } + ObjectClosure* cl = (par ? _par_closures[worker] : _closure); + if (nd != NULL) { + ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); + deallocate_buffer(nd->buf); + delete nd; + return true; + } else { + return false; + } +} + +void SATBMarkQueueSet::abandon_partial_marking() { + CompletedBufferNode* buffers_to_delete = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + while (_completed_buffers_head != NULL) { + CompletedBufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + nd->next = buffers_to_delete; + buffers_to_delete = nd; + } + _completed_buffers_tail = NULL; + _n_completed_buffers = 0; + debug_only(assert_completed_buffer_list_len_correct_locked()); + } + while (buffers_to_delete != NULL) { + CompletedBufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next; + deallocate_buffer(nd->buf); + delete nd; + } + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + // So we can safely manipulate these queues. + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().reset(); + } + shared_satb_queue()->reset(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/satbQueue.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,105 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class ObjectClosure; +class JavaThread; + +// A ptrQueue whose elements are "oops", pointers to object heads. +class ObjPtrQueue: public PtrQueue { +public: + ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) : + PtrQueue(qset_, perm) + {} + // Apply the closure to all elements, and reset the index to make the + // buffer empty. + void apply_closure(ObjectClosure* cl); + + // Apply the closure to all elements of "buf", down to "index" (inclusive.) + static void apply_closure_to_buffer(ObjectClosure* cl, + void** buf, size_t index, size_t sz); + +}; + + + +class SATBMarkQueueSet: public PtrQueueSet { + ObjectClosure* _closure; + ObjectClosure** _par_closures; // One per ParGCThread. + + ObjPtrQueue _shared_satb_queue; + + // Utility function to support sequential and parallel versions. If + // "par" is true, then "worker" is the par thread id; if "false", worker + // is ignored. + bool apply_closure_to_completed_buffer_work(bool par, int worker); + + +public: + SATBMarkQueueSet(); + + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0, + Mutex* lock = NULL); + + static void handle_zero_index_for_thread(JavaThread* t); + + // Apply "set_active(b)" to all thread tloq's. Should be called only + // with the world stopped. + void set_active_all_threads(bool b); + + // Register "blk" as "the closure" for all queues. Only one such closure + // is allowed. The "apply_closure_to_completed_buffer" method will apply + // this closure to a completed buffer, and "iterate_closure_all_threads" + // applies it to partially-filled buffers (the latter should only be done + // with the world stopped). + void set_closure(ObjectClosure* closure); + // Set the parallel closures: pointer is an array of pointers to + // closures, one for each parallel GC thread. + void set_par_closure(int i, ObjectClosure* closure); + + // If there is a registered closure for buffers, apply it to all entries + // in all currently-active buffers. This should only be applied at a + // safepoint. (Currently must not be called in parallel; this should + // change in the future.) + void iterate_closure_all_threads(); + // Parallel version of the above. + void par_iterate_closure_all_threads(int worker); + + // If there exists some completed buffer, pop it, then apply the + // registered closure to all its elements, and return true. If no + // completed buffers exist, return false. + bool apply_closure_to_completed_buffer() { + return apply_closure_to_completed_buffer_work(false, 0); + } + // Parallel version of the above. + bool par_apply_closure_to_completed_buffer(int worker) { + return apply_closure_to_completed_buffer_work(true, worker); + } + + ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; } + + // If a marking is being abandoned, reset any unprocessed log buffers. + void abandon_partial_marking(); + +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/sparsePRT.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,530 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_sparsePRT.cpp.incl" + +#define SPARSE_PRT_VERBOSE 0 + +#define UNROLL_CARD_LOOPS 1 + +void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) { + sprt_iter->init(this); +} + +void SparsePRTEntry::init(short region_ind) { + _region_ind = region_ind; + _next_index = NullEntry; +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + _cards[0] = NullEntry; + _cards[1] = NullEntry; + _cards[2] = NullEntry; + _cards[3] = NullEntry; +#else + for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry; +#endif +} + +bool SparsePRTEntry::contains_card(short card_index) const { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + if (_cards[0] == card_index) return true; + if (_cards[1] == card_index) return true; + if (_cards[2] == card_index) return true; + if (_cards[3] == card_index) return true; +#else + for (int i = 0; i < CardsPerEntry; i++) { + if (_cards[i] == card_index) return true; + } +#endif + // Otherwise, we're full. + return false; +} + +int SparsePRTEntry::num_valid_cards() const { + int sum = 0; +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + if (_cards[0] != NullEntry) sum++; + if (_cards[1] != NullEntry) sum++; + if (_cards[2] != NullEntry) sum++; + if (_cards[3] != NullEntry) sum++; +#else + for (int i = 0; i < CardsPerEntry; i++) { + if (_cards[i] != NulLEntry) sum++; + } +#endif + // Otherwise, we're full. + return sum; +} + +SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + short c = _cards[0]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[0] = card_index; return added; } + c = _cards[1]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[1] = card_index; return added; } + c = _cards[2]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[2] = card_index; return added; } + c = _cards[3]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[3] = card_index; return added; } +#else + for (int i = 0; i < CardsPerEntry; i++) { + short c = _cards[i]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i] = card_index; return added; } + } +#endif + // Otherwise, we're full. + return overflow; +} + +void SparsePRTEntry::copy_cards(short* cards) const { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + cards[0] = _cards[0]; + cards[1] = _cards[1]; + cards[2] = _cards[2]; + cards[3] = _cards[3]; +#else + for (int i = 0; i < CardsPerEntry; i++) { + cards[i] = _cards[i]; + } +#endif +} + +void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const { + copy_cards(&e->_cards[0]); +} + +// ---------------------------------------------------------------------- + +RSHashTable::RSHashTable(size_t capacity) : + _capacity(capacity), _capacity_mask(capacity-1), + _occupied_entries(0), _occupied_cards(0), + _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)), + _buckets(NEW_C_HEAP_ARRAY(short, capacity)), + _next_deleted(NULL), _deleted(false), + _free_list(NullEntry), _free_region(0) +{ + clear(); +} + +RSHashTable::~RSHashTable() { + if (_entries != NULL) { + FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries); + _entries = NULL; + } + if (_buckets != NULL) { + FREE_C_HEAP_ARRAY(short, _buckets); + _buckets = NULL; + } +} + +void RSHashTable::clear() { + _occupied_entries = 0; + _occupied_cards = 0; + guarantee(_entries != NULL, "INV"); + guarantee(_buckets != NULL, "INV"); + // This will put -1 == NullEntry in the key field of all entries. + memset(_entries, -1, _capacity * sizeof(SparsePRTEntry)); + memset(_buckets, -1, _capacity * sizeof(short)); + _free_list = NullEntry; + _free_region = 0; +} + +bool RSHashTable::add_card(short region_ind, short card_index) { + SparsePRTEntry* e = entry_for_region_ind_create(region_ind); + assert(e != NULL && e->r_ind() == region_ind, + "Postcondition of call above."); + SparsePRTEntry::AddCardResult res = e->add_card(card_index); + if (res == SparsePRTEntry::added) _occupied_cards++; +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" after add_card[%d]: valid-cards = %d.", + pointer_delta(e, _entries, sizeof(SparsePRTEntry)), + e->num_valid_cards()); +#endif + assert(e->num_valid_cards() > 0, "Postcondition"); + return res != SparsePRTEntry::overflow; +} + +bool RSHashTable::get_cards(short region_ind, short* cards) { + short ind = (short) (region_ind & capacity_mask()); + short cur_ind = _buckets[ind]; + SparsePRTEntry* cur; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + cur_ind = cur->next_index(); + } + + if (cur_ind == NullEntry) return false; + // Otherwise... + assert(cur->r_ind() == region_ind, "Postcondition of loop + test above."); + assert(cur->num_valid_cards() > 0, "Inv"); + cur->copy_cards(cards); + return true; +} + +bool RSHashTable::delete_entry(short region_ind) { + short ind = (short) (region_ind & capacity_mask()); + short* prev_loc = &_buckets[ind]; + short cur_ind = *prev_loc; + SparsePRTEntry* cur; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + prev_loc = cur->next_index_addr(); + cur_ind = *prev_loc; + } + + if (cur_ind == NullEntry) return false; + // Otherwise, splice out "cur". + *prev_loc = cur->next_index(); + _occupied_cards -= cur->num_valid_cards(); + free_entry(cur_ind); + _occupied_entries--; + return true; +} + +SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const { + assert(occupied_entries() < capacity(), "Precondition"); + short ind = (short) (region_ind & capacity_mask()); + short cur_ind = _buckets[ind]; + SparsePRTEntry* cur; + // XXX + // int k = 0; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + /* + k++; + if (k > 10) { + gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): " + "k = %d, cur_ind = %d.", region_ind, k, cur_ind); + if (k >= 1000) { + while (1) ; + } + } + */ + cur_ind = cur->next_index(); + } + + if (cur_ind != NullEntry) { + assert(cur->r_ind() == region_ind, "Loop postcondition + test"); + return cur; + } else { + return NULL; + } +} + +SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) { + SparsePRTEntry* res = entry_for_region_ind(region_ind); + if (res == NULL) { + short new_ind = alloc_entry(); + assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room."); + res = entry(new_ind); + res->init(region_ind); + // Insert at front. + short ind = (short) (region_ind & capacity_mask()); + res->set_next_index(_buckets[ind]); + _buckets[ind] = new_ind; + _occupied_entries++; + } + return res; +} + +short RSHashTable::alloc_entry() { + short res; + if (_free_list != NullEntry) { + res = _free_list; + _free_list = entry(res)->next_index(); + return res; + } else if ((size_t) _free_region+1 < capacity()) { + res = _free_region; + _free_region++; + return res; + } else { + return NullEntry; + } +} + + +void RSHashTable::free_entry(short fi) { + entry(fi)->set_next_index(_free_list); + _free_list = fi; +} + + +void RSHashTable::add_entry(SparsePRTEntry* e) { + assert(e->num_valid_cards() > 0, "Precondition."); + SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind()); + e->copy_cards(e2); + _occupied_cards += e2->num_valid_cards(); + assert(e2->num_valid_cards() > 0, "Postcondition."); +} + +RSHashTable* RSHashTable::_head_deleted_list = NULL; + +void RSHashTable::add_to_deleted_list(RSHashTable* rsht) { + assert(!rsht->deleted(), "Should delete only once."); + rsht->set_deleted(true); + RSHashTable* hd = _head_deleted_list; + while (true) { + rsht->_next_deleted = hd; + RSHashTable* res = + (RSHashTable*) + Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd); + if (res == hd) return; + else hd = res; + } +} + +RSHashTable* RSHashTable::get_from_deleted_list() { + RSHashTable* hd = _head_deleted_list; + while (hd != NULL) { + RSHashTable* next = hd->next_deleted(); + RSHashTable* res = + (RSHashTable*) + Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd); + if (res == hd) { + hd->set_next_deleted(NULL); + hd->set_deleted(false); + return hd; + } else { + hd = res; + } + } + return NULL; +} + +short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() { + short res; + while (_bl_ind != RSHashTable::NullEntry) { + res = _rsht->entry(_bl_ind)->card(0); + if (res != SparsePRTEntry::NullEntry) { + return res; + } else { + _bl_ind = _rsht->entry(_bl_ind)->next_index(); + } + } + // Otherwise, none found: + return SparsePRTEntry::NullEntry; +} + +size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) { + return + _heap_bot_card_ind + + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion) + + ci; +} + +bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) { + _card_ind++; + short ci; + if (_card_ind < SparsePRTEntry::CardsPerEntry && + ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) != + SparsePRTEntry::NullEntry)) { + card_index = compute_card_ind(ci); + return true; + } + // Otherwise, must find the next valid entry. + _card_ind = 0; + + if (_bl_ind != RSHashTable::NullEntry) { + _bl_ind = _rsht->entry(_bl_ind)->next_index(); + ci = find_first_card_in_list(); + if (ci != SparsePRTEntry::NullEntry) { + card_index = compute_card_ind(ci); + return true; + } + } + // If we didn't return above, must go to the next non-null table index. + _tbl_ind++; + while ((size_t)_tbl_ind < _rsht->capacity()) { + _bl_ind = _rsht->_buckets[_tbl_ind]; + ci = find_first_card_in_list(); + if (ci != SparsePRTEntry::NullEntry) { + card_index = compute_card_ind(ci); + return true; + } + // Otherwise, try next entry. + _tbl_ind++; + } + // Otherwise, there were no entry. + return false; +} + +bool RSHashTable::contains_card(short region_index, short card_index) const { + SparsePRTEntry* e = entry_for_region_ind(region_index); + return (e != NULL && e->contains_card(card_index)); +} + +size_t RSHashTable::mem_size() const { + return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short)); +} + + +// ---------------------------------------------------------------------- + +SparsePRT* SparsePRT::_head_expanded_list = NULL; + +void SparsePRT::add_to_expanded_list(SparsePRT* sprt) { + // We could expand multiple times in a pause -- only put on list once. + if (sprt->expanded()) return; + sprt->set_expanded(true); + SparsePRT* hd = _head_expanded_list; + while (true) { + sprt->_next_expanded = hd; + SparsePRT* res = + (SparsePRT*) + Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd); + if (res == hd) return; + else hd = res; + } +} + +SparsePRT* SparsePRT::get_from_expanded_list() { + SparsePRT* hd = _head_expanded_list; + while (hd != NULL) { + SparsePRT* next = hd->next_expanded(); + SparsePRT* res = + (SparsePRT*) + Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd); + if (res == hd) { + hd->set_next_expanded(NULL); + return hd; + } else { + hd = res; + } + } + return NULL; +} + + +void SparsePRT::cleanup_all() { + // First clean up all expanded tables so they agree on next and cur. + SparsePRT* sprt = get_from_expanded_list(); + while (sprt != NULL) { + sprt->cleanup(); + sprt = get_from_expanded_list(); + } + // Now delete all deleted RSHashTables. + RSHashTable* rsht = RSHashTable::get_from_deleted_list(); + while (rsht != NULL) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht); +#endif + delete rsht; + rsht = RSHashTable::get_from_deleted_list(); + } +} + + +SparsePRT::SparsePRT(HeapRegion* hr) : + _expanded(false), _next_expanded(NULL) +{ + _cur = new RSHashTable(InitialCapacity); + _next = _cur; +} + +SparsePRT::~SparsePRT() { + assert(_next != NULL && _cur != NULL, "Inv"); + if (_cur != _next) { delete _cur; } + delete _next; +} + + +size_t SparsePRT::mem_size() const { + // We ignore "_cur" here, because it either = _next, or else it is + // on the deleted list. + return sizeof(this) + _next->mem_size(); +} + +bool SparsePRT::add_card(short region_id, short card_index) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" Adding card %d from region %d to region %d sparse.", + card_index, region_id, _hr->hrs_index()); +#endif + if (_next->occupied_entries() * 2 > _next->capacity()) { + expand(); + } + return _next->add_card(region_id, card_index); +} + +bool SparsePRT::get_cards(short region_id, short* cards) { + return _next->get_cards(region_id, cards); +} + +bool SparsePRT::delete_entry(short region_id) { + return _next->delete_entry(region_id); +} + +void SparsePRT::clear() { + // If they differ, _next is bigger then cur, so next has no chance of + // being the initial size. + if (_next != _cur) { + delete _next; + } + + if (_cur->capacity() != InitialCapacity) { + delete _cur; + _cur = new RSHashTable(InitialCapacity); + } else { + _cur->clear(); + } + _next = _cur; +} + +void SparsePRT::cleanup() { + // Make sure that the current and next tables agree. (Another mechanism + // takes care of deleting now-unused tables.) + _cur = _next; +} + +void SparsePRT::expand() { + RSHashTable* last = _next; + _next = new RSHashTable(last->capacity() * 2); + +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" Expanded sparse table for %d to %d.", + _hr->hrs_index(), _next->capacity()); +#endif + for (size_t i = 0; i < last->capacity(); i++) { + SparsePRTEntry* e = last->entry((int)i); + if (e->valid_entry()) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" During expansion, transferred entry for %d.", + e->r_ind()); +#endif + _next->add_entry(e); + } + } + if (last != _cur) + RSHashTable::add_to_deleted_list(last); + add_to_expanded_list(this); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/sparsePRT.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,308 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Sparse remembered set for a heap region (the "owning" region). Maps +// indices of other regions to short sequences of cards in the other region +// that might contain pointers into the owner region. + +// These tables only expand while they are accessed in parallel -- +// deletions may be done in single-threaded code. This allows us to allow +// unsynchronized reads/iterations, as long as expansions caused by +// insertions only enqueue old versions for deletions, but do not delete +// old versions synchronously. + + +class SparsePRTEntry { +public: + enum SomePublicConstants { + CardsPerEntry = (short)4, + NullEntry = (short)-1, + DeletedEntry = (short)-2 + }; + +private: + short _region_ind; + short _next_index; + short _cards[CardsPerEntry]; + +public: + + // Set the region_ind to the given value, and delete all cards. + inline void init(short region_ind); + + short r_ind() const { return _region_ind; } + bool valid_entry() const { return r_ind() >= 0; } + void set_r_ind(short rind) { _region_ind = rind; } + + short next_index() const { return _next_index; } + short* next_index_addr() { return &_next_index; } + void set_next_index(short ni) { _next_index = ni; } + + // Returns "true" iff the entry contains the given card index. + inline bool contains_card(short card_index) const; + + // Returns the number of non-NULL card entries. + inline int num_valid_cards() const; + + // Requires that the entry not contain the given card index. If there is + // space available, add the given card index to the entry and return + // "true"; otherwise, return "false" to indicate that the entry is full. + enum AddCardResult { + overflow, + found, + added + }; + inline AddCardResult add_card(short card_index); + + // Copy the current entry's cards into "cards". + inline void copy_cards(short* cards) const; + // Copy the current entry's cards into the "_card" array of "e." + inline void copy_cards(SparsePRTEntry* e) const; + + inline short card(int i) const { return _cards[i]; } +}; + + +class RSHashTable : public CHeapObj { + + friend class RSHashTableIter; + + enum SomePrivateConstants { + NullEntry = -1 + }; + + size_t _capacity; + size_t _capacity_mask; + size_t _occupied_entries; + size_t _occupied_cards; + + SparsePRTEntry* _entries; + short* _buckets; + short _free_region; + short _free_list; + + static RSHashTable* _head_deleted_list; + RSHashTable* _next_deleted; + RSHashTable* next_deleted() { return _next_deleted; } + void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; } + bool _deleted; + void set_deleted(bool b) { _deleted = b; } + + // Requires that the caller hold a lock preventing parallel modifying + // operations, and that the the table be less than completely full. If + // an entry for "region_ind" is already in the table, finds it and + // returns its address; otherwise returns "NULL." + SparsePRTEntry* entry_for_region_ind(short region_ind) const; + + // Requires that the caller hold a lock preventing parallel modifying + // operations, and that the the table be less than completely full. If + // an entry for "region_ind" is already in the table, finds it and + // returns its address; otherwise allocates, initializes, inserts and + // returns a new entry for "region_ind". + SparsePRTEntry* entry_for_region_ind_create(short region_ind); + + // Returns the index of the next free entry in "_entries". + short alloc_entry(); + // Declares the entry "fi" to be free. (It must have already been + // deleted from any bucket lists. + void free_entry(short fi); + +public: + RSHashTable(size_t capacity); + ~RSHashTable(); + + // Attempts to ensure that the given card_index in the given region is in + // the sparse table. If successful (because the card was already + // present, or because it was successfullly added) returns "true". + // Otherwise, returns "false" to indicate that the addition would + // overflow the entry for the region. The caller must transfer these + // entries to a larger-capacity representation. + bool add_card(short region_id, short card_index); + + bool get_cards(short region_id, short* cards); + bool delete_entry(short region_id); + + bool contains_card(short region_id, short card_index) const; + + void add_entry(SparsePRTEntry* e); + + void clear(); + + size_t capacity() const { return _capacity; } + size_t capacity_mask() const { return _capacity_mask; } + size_t occupied_entries() const { return _occupied_entries; } + size_t occupied_cards() const { return _occupied_cards; } + size_t mem_size() const; + bool deleted() { return _deleted; } + + SparsePRTEntry* entry(int i) const { return &_entries[i]; } + + void print(); + + static void add_to_deleted_list(RSHashTable* rsht); + static RSHashTable* get_from_deleted_list(); + + +}; + + // ValueObj because will be embedded in HRRS iterator. +class RSHashTableIter: public CHeapObj { + short _tbl_ind; + short _bl_ind; + short _card_ind; + RSHashTable* _rsht; + size_t _heap_bot_card_ind; + + enum SomePrivateConstants { + CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift + }; + + // If the bucket list pointed to by _bl_ind contains a card, sets + // _bl_ind to the index of that entry, and returns the card. + // Otherwise, returns SparseEntry::NullEnty. + short find_first_card_in_list(); + // Computes the proper card index for the card whose offset in the + // current region (as indicated by _bl_ind) is "ci". + // This is subject to errors when there is iteration concurrent with + // modification, but these errors should be benign. + size_t compute_card_ind(short ci); + + public: + RSHashTableIter(size_t heap_bot_card_ind) : + _tbl_ind(RSHashTable::NullEntry), + _bl_ind(RSHashTable::NullEntry), + _card_ind((SparsePRTEntry::CardsPerEntry-1)), + _rsht(NULL), + _heap_bot_card_ind(heap_bot_card_ind) + {} + + void init(RSHashTable* rsht) { + _rsht = rsht; + _tbl_ind = -1; // So that first increment gets to 0. + _bl_ind = RSHashTable::NullEntry; + _card_ind = (SparsePRTEntry::CardsPerEntry-1); + } + + bool has_next(size_t& card_index); + + }; + +// Concurrent accesss to a SparsePRT must be serialized by some external +// mutex. + +class SparsePRTIter; + +class SparsePRT : public CHeapObj { + // Iterations are done on the _cur hash table, since they only need to + // see entries visible at the start of a collection pause. + // All other operations are done using the _next hash table. + RSHashTable* _cur; + RSHashTable* _next; + + HeapRegion* _hr; + + enum SomeAdditionalPrivateConstants { + InitialCapacity = 16 + }; + + void expand(); + + bool _expanded; + + bool expanded() { return _expanded; } + void set_expanded(bool b) { _expanded = b; } + + SparsePRT* _next_expanded; + + SparsePRT* next_expanded() { return _next_expanded; } + void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; } + + + static SparsePRT* _head_expanded_list; + +public: + SparsePRT(HeapRegion* hr); + + ~SparsePRT(); + + size_t occupied() const { return _next->occupied_cards(); } + size_t mem_size() const; + + // Attempts to ensure that the given card_index in the given region is in + // the sparse table. If successful (because the card was already + // present, or because it was successfullly added) returns "true". + // Otherwise, returns "false" to indicate that the addition would + // overflow the entry for the region. The caller must transfer these + // entries to a larger-capacity representation. + bool add_card(short region_id, short card_index); + + // If the table hold an entry for "region_ind", Copies its + // cards into "cards", which must be an array of length at least + // "CardsPerEntry", and returns "true"; otherwise, returns "false". + bool get_cards(short region_ind, short* cards); + + // If there is an entry for "region_ind", removes it and return "true"; + // otherwise returns "false." + bool delete_entry(short region_ind); + + // Clear the table, and reinitialize to initial capacity. + void clear(); + + // Ensure that "_cur" and "_next" point to the same table. + void cleanup(); + + // Clean up all tables on the expanded list. Called single threaded. + static void cleanup_all(); + RSHashTable* next() const { return _next; } + + + void init_iterator(SparsePRTIter* sprt_iter); + + static void add_to_expanded_list(SparsePRT* sprt); + static SparsePRT* get_from_expanded_list(); + + bool contains_card(short region_id, short card_index) const { + return _next->contains_card(region_id, card_index); + } + +#if 0 + void verify_is_cleared(); + void print(); +#endif +}; + + +class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter { +public: + SparsePRTIter(size_t heap_bot_card_ind) : + /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind) + {} + + void init(const SparsePRT* sprt) { + RSHashTableIter::init(sprt->next()); + } + bool has_next(size_t& card_index) { + return RSHashTableIter::has_next(card_index); + } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/survRateGroup.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,264 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_survRateGroup.cpp.incl" + +SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p, + const char* name, + size_t summary_surv_rates_len) : + _g1p(g1p), _name(name), + _all_regions_allocated(0), + _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0), + _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL), + _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0), + _summary_surv_rates_len(summary_surv_rates_len), + _summary_surv_rates_max_len(0), + _summary_surv_rates(NULL) { + + // the following will set up the arrays with length 1 + _curr_length = 1; + stop_adding_regions(); + guarantee( _array_length == 1, "invariant" ); + guarantee( _surv_rate_pred[0] != NULL, "invariant" ); + _surv_rate_pred[0]->add(0.4); + all_surviving_words_recorded(false); + _curr_length = 0; + + if (summary_surv_rates_len > 0) { + size_t length = summary_surv_rates_len; + _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length); + if (_summary_surv_rates == NULL) { + vm_exit_out_of_memory(sizeof(NumberSeq*) * length, + "Not enough space for surv rate summary"); + } + for (size_t i = 0; i < length; ++i) + _summary_surv_rates[i] = new NumberSeq(); + } + + start_adding_regions(); +} + +void +SurvRateGroup::start_adding_regions() { + _setup_seq_num = _array_length; + _curr_length = _scan_only_prefix; + _accum_surv_rate = 0.0; + +#if 0 + gclog_or_tty->print_cr("start adding regions, seq num %d, length %d", + _setup_seq_num, _curr_length); +#endif // 0 +} + +void +SurvRateGroup::stop_adding_regions() { + size_t length = _curr_length; + +#if 0 + gclog_or_tty->print_cr("stop adding regions, length %d", length); +#endif // 0 + + if (length > _array_length) { + double* old_surv_rate = _surv_rate; + double* old_accum_surv_rate_pred = _accum_surv_rate_pred; + TruncatedSeq** old_surv_rate_pred = _surv_rate_pred; + + _surv_rate = NEW_C_HEAP_ARRAY(double, length); + if (_surv_rate == NULL) { + vm_exit_out_of_memory(sizeof(double) * length, + "Not enough space for surv rate array."); + } + _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length); + if (_accum_surv_rate_pred == NULL) { + vm_exit_out_of_memory(sizeof(double) * length, + "Not enough space for accum surv rate pred array."); + } + _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length); + if (_surv_rate == NULL) { + vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length, + "Not enough space for surv rate pred array."); + } + + for (size_t i = 0; i < _array_length; ++i) + _surv_rate_pred[i] = old_surv_rate_pred[i]; + +#if 0 + gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d", + _array_length, length - 1); +#endif // 0 + + for (size_t i = _array_length; i < length; ++i) { + _surv_rate_pred[i] = new TruncatedSeq(10); + // _surv_rate_pred[i]->add(last_pred); + } + + _array_length = length; + + if (old_surv_rate != NULL) + FREE_C_HEAP_ARRAY(double, old_surv_rate); + if (old_accum_surv_rate_pred != NULL) + FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred); + if (old_surv_rate_pred != NULL) + FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred); + } + + for (size_t i = 0; i < _array_length; ++i) + _surv_rate[i] = 0.0; +} + +double +SurvRateGroup::accum_surv_rate(size_t adjustment) { + // we might relax this one in the future... + guarantee( adjustment == 0 || adjustment == 1, "pre-condition" ); + + double ret = _accum_surv_rate; + if (adjustment > 0) { + TruncatedSeq* seq = get_seq(_curr_length+1); + double surv_rate = _g1p->get_new_prediction(seq); + ret += surv_rate; + } + + return ret; +} + +int +SurvRateGroup::next_age_index() { + TruncatedSeq* seq = get_seq(_curr_length); + double surv_rate = _g1p->get_new_prediction(seq); + _accum_surv_rate += surv_rate; + + ++_curr_length; + return (int) ++_all_regions_allocated; +} + +void +SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) { + guarantee( scan_only_prefix <= _curr_length, "pre-condition" ); + _scan_only_prefix = scan_only_prefix; +} + +void +SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) { + guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length, + "pre-condition" ); + guarantee( _surv_rate[age_in_group] <= 0.00001, + "should only update each slot once" ); + + double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords; + _surv_rate[age_in_group] = surv_rate; + _surv_rate_pred[age_in_group]->add(surv_rate); + if ((size_t)age_in_group < _summary_surv_rates_len) { + _summary_surv_rates[age_in_group]->add(surv_rate); + if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len) + _summary_surv_rates_max_len = age_in_group+1; + } +} + +void +SurvRateGroup::all_surviving_words_recorded(bool propagate) { + if (propagate && _curr_length > 0) { // conservative + double surv_rate = _surv_rate_pred[_curr_length-1]->last(); + +#if 0 + gclog_or_tty->print_cr("propagating %1.2lf from %d to %d", + surv_rate, _curr_length, _array_length - 1); +#endif // 0 + + for (size_t i = _curr_length; i < _array_length; ++i) { + guarantee( _surv_rate[i] <= 0.00001, + "the slot should not have been updated" ); + _surv_rate_pred[i]->add(surv_rate); + } + } + + double accum = 0.0; + double pred = 0.0; + for (size_t i = 0; i < _array_length; ++i) { + pred = _g1p->get_new_prediction(_surv_rate_pred[i]); + if (pred > 1.0) pred = 1.0; + accum += pred; + _accum_surv_rate_pred[i] = accum; + // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum); + } + _last_pred = pred; +} + +#ifndef PRODUCT +void +SurvRateGroup::print() { + gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)", + _name, _curr_length, _scan_only_prefix); + for (size_t i = 0; i < _curr_length; ++i) { + gclog_or_tty->print_cr(" age %4d surv rate %6.2lf %% pred %6.2lf %%%s", + i, _surv_rate[i] * 100.0, + _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0, + (i < _scan_only_prefix) ? " S-O" : " "); + } +} + +void +SurvRateGroup::print_surv_rate_summary() { + size_t length = _summary_surv_rates_max_len; + if (length == 0) + return; + + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1); + gclog_or_tty->print_cr(" age range survival rate (avg) samples (avg)"); + gclog_or_tty->print_cr(" ---------------------------------------------------------"); + + size_t index = 0; + size_t limit = MIN2((int) length, 10); + while (index < limit) { + gclog_or_tty->print_cr(" %4d %6.2lf%% %6.2lf", + index, _summary_surv_rates[index]->avg() * 100.0, + (double) _summary_surv_rates[index]->num()); + ++index; + } + + gclog_or_tty->print_cr(" ---------------------------------------------------------"); + + int num = 0; + double sum = 0.0; + int samples = 0; + while (index < length) { + ++num; + sum += _summary_surv_rates[index]->avg() * 100.0; + samples += _summary_surv_rates[index]->num(); + ++index; + + if (index == length || num % 10 == 0) { + gclog_or_tty->print_cr(" %4d .. %4d %6.2lf%% %6.2lf", + (index-1) / 10 * 10, index-1, sum / (double) num, + (double) samples / (double) num); + sum = 0.0; + num = 0; + samples = 0; + } + } + + gclog_or_tty->print_cr(" ---------------------------------------------------------"); +} +#endif // PRODUCT diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/survRateGroup.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,102 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class G1CollectorPolicy; + +class SurvRateGroup : public CHeapObj { +private: + G1CollectorPolicy* _g1p; + const char* _name; + + size_t _array_length; + double* _surv_rate; + double* _accum_surv_rate_pred; + double _last_pred; + double _accum_surv_rate; + TruncatedSeq** _surv_rate_pred; + NumberSeq** _summary_surv_rates; + size_t _summary_surv_rates_len; + size_t _summary_surv_rates_max_len; + + int _all_regions_allocated; + size_t _curr_length; + size_t _scan_only_prefix; + size_t _setup_seq_num; + +public: + SurvRateGroup(G1CollectorPolicy* g1p, + const char* name, + size_t summary_surv_rates_len); + void start_adding_regions(); + void stop_adding_regions(); + void record_scan_only_prefix(size_t scan_only_prefix); + void record_surviving_words(int age_in_group, size_t surv_words); + void all_surviving_words_recorded(bool propagate); + const char* name() { return _name; } + + size_t region_num() { return _curr_length; } + size_t scan_only_length() { return _scan_only_prefix; } + double accum_surv_rate_pred(int age) { + assert(age >= 0, "must be"); + if ((size_t)age < _array_length) + return _accum_surv_rate_pred[age]; + else { + double diff = (double) (age - _array_length + 1); + return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred; + } + } + + double accum_surv_rate(size_t adjustment); + + TruncatedSeq* get_seq(size_t age) { + guarantee( 0 <= age, "pre-condition" ); + if (age >= _setup_seq_num) { + guarantee( _setup_seq_num > 0, "invariant" ); + age = _setup_seq_num-1; + } + TruncatedSeq* seq = _surv_rate_pred[age]; + guarantee( seq != NULL, "invariant" ); + return seq; + } + + int next_age_index(); + int age_in_group(int age_index) { + int ret = (int) (_all_regions_allocated - age_index); + assert( ret >= 0, "invariant" ); + return ret; + } + int recalculate_age_index(int age_index) { + int new_age_index = (int) _scan_only_prefix - age_in_group(age_index); + guarantee( new_age_index >= 0, "invariant" ); + return new_age_index; + } + void finished_recalculating_age_indexes() { + _all_regions_allocated = (int) _scan_only_prefix; + } + +#ifndef PRODUCT + void print(); + void print_surv_rate_summary(); +#endif // PRODUCT +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/vm_operations_g1.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,79 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_vm_operations_g1.cpp.incl" + +void VM_G1CollectForAllocation::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _res = g1h->satisfy_failed_allocation(_size); + assert(g1h->is_in_or_null(_res), "result not in heap"); +} + +void VM_G1CollectFull::doit() { + JvmtiGCFullMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + GCCauseSetter x(g1h, _gc_cause); + g1h->do_full_collection(false /* clear_all_soft_refs */); +} + +void VM_G1IncCollectionPause::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause); + g1h->do_collection_pause_at_safepoint(NULL); +} + +void VM_G1PopRegionCollectionPause::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + g1h->do_collection_pause_at_safepoint(_pop_region); +} + + +void VM_CGC_Operation::doit() { + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty); + SharedHeap* sh = SharedHeap::heap(); + // This could go away if CollectedHeap gave access to _gc_is_active... + if (sh != NULL) { + IsGCActiveMark x; + _cl->do_void(); + } else { + _cl->do_void(); + } +} + +bool VM_CGC_Operation::doit_prologue() { + Heap_lock->lock(); + SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true; + return true; +} + +void VM_CGC_Operation::doit_epilogue() { + SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false; + Heap_lock->unlock(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/vm_operations_g1.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,114 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// VM_operations for the G1 collector. +// VM_GC_Operation: +// - VM_CGC_Operation +// - VM_G1CollectFull +// - VM_G1CollectForAllocation +// - VM_G1IncCollectionPause +// - VM_G1PopRegionCollectionPause + +class VM_G1CollectFull: public VM_GC_Operation { + private: + public: + VM_G1CollectFull(int gc_count_before, + GCCause::Cause gc_cause) + : VM_GC_Operation(gc_count_before) + { + _gc_cause = gc_cause; + } + ~VM_G1CollectFull() {} + virtual VMOp_Type type() const { return VMOp_G1CollectFull; } + virtual void doit(); + virtual const char* name() const { + return "full garbage-first collection"; + } +}; + +class VM_G1CollectForAllocation: public VM_GC_Operation { + private: + HeapWord* _res; + size_t _size; // size of object to be allocated + public: + VM_G1CollectForAllocation(size_t size, int gc_count_before) + : VM_GC_Operation(gc_count_before) { + _size = size; + _res = NULL; + } + ~VM_G1CollectForAllocation() {} + virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first collection to satisfy allocation"; + } + HeapWord* result() { return _res; } +}; + +class VM_G1IncCollectionPause: public VM_GC_Operation { + public: + VM_G1IncCollectionPause(int gc_count_before) : + VM_GC_Operation(gc_count_before) {} + virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first incremental collection pause"; + } +}; + +class VM_G1PopRegionCollectionPause: public VM_GC_Operation { + HeapRegion* _pop_region; + public: + VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) : + VM_GC_Operation(gc_count_before), + _pop_region(pop_region) + {} + virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first popular region collection pause"; + } +}; + +// Concurrent GC stop-the-world operations such as initial and final mark; +// consider sharing these with CMS's counterparts. +class VM_CGC_Operation: public VM_Operation { + VoidClosure* _cl; + const char* _printGCMessage; + public: + VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) : + _cl(cl), + _printGCMessage(printGCMsg) + {} + + ~VM_CGC_Operation() {} + + virtual VMOp_Type type() const { return VMOp_CGC_Operation; } + virtual void doit(); + virtual bool doit_prologue(); + virtual void doit_epilogue(); + virtual const char* name() const { + return "concurrent gc"; + } +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep --- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep Thu Jun 05 15:57:56 2008 -0700 @@ -123,17 +123,6 @@ compactingPermGenGen.cpp concurrentMarkSweepGeneration.inline.hpp -concurrentGCThread.cpp concurrentGCThread.hpp -concurrentGCThread.cpp init.hpp -concurrentGCThread.cpp instanceRefKlass.hpp -concurrentGCThread.cpp interfaceSupport.hpp -concurrentGCThread.cpp java.hpp -concurrentGCThread.cpp javaCalls.hpp -concurrentGCThread.cpp oop.inline.hpp -concurrentGCThread.cpp systemDictionary.hpp - -concurrentGCThread.hpp thread.hpp - concurrentMarkSweepGeneration.cpp cardTableRS.hpp concurrentMarkSweepGeneration.cpp cmsAdaptiveSizePolicy.hpp concurrentMarkSweepGeneration.cpp cmsCollectorPolicy.hpp @@ -165,7 +154,7 @@ concurrentMarkSweepGeneration.cpp vmCMSOperations.hpp concurrentMarkSweepGeneration.cpp vmThread.hpp -concurrentMarkSweepGeneration.hpp bitMap.hpp +concurrentMarkSweepGeneration.hpp bitMap.inline.hpp concurrentMarkSweepGeneration.hpp freeBlockDictionary.hpp concurrentMarkSweepGeneration.hpp gSpaceCounters.hpp concurrentMarkSweepGeneration.hpp gcStats.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_g1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,349 @@ +// +// Copyright 2004-2006 Sun Microsystems, Inc. All Rights Reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +// CA 95054 USA or visit www.sun.com if you need additional information or +// have any questions. +// +// + +// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! + +bufferingOopClosure.hpp genOopClosures.hpp +bufferingOopClosure.hpp generation.hpp +bufferingOopClosure.hpp os.hpp + +cardTableRS.cpp concurrentMark.hpp +cardTableRS.cpp g1SATBCardTableModRefBS.hpp + +collectionSetChooser.cpp g1CollectedHeap.hpp +collectionSetChooser.cpp g1CollectorPolicy.hpp +collectionSetChooser.cpp collectionSetChooser.hpp + +collectionSetChooser.hpp heapRegion.hpp +collectionSetChooser.hpp growableArray.hpp + +concurrentG1Refine.cpp atomic.hpp +concurrentG1Refine.cpp concurrentG1Refine.hpp +concurrentG1Refine.cpp concurrentG1RefineThread.hpp +concurrentG1Refine.cpp copy.hpp +concurrentG1Refine.cpp g1CollectedHeap.hpp +concurrentG1Refine.cpp g1RemSet.hpp + +concurrentG1Refine.hpp globalDefinitions.hpp + +concurrentG1RefineThread.cpp concurrentG1Refine.hpp +concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp +concurrentG1RefineThread.cpp g1CollectedHeap.hpp +concurrentG1RefineThread.cpp g1CollectorPolicy.hpp +concurrentG1RefineThread.cpp handles.inline.hpp +concurrentG1RefineThread.cpp mutexLocker.hpp +concurrentG1RefineThread.cpp resourceArea.hpp + +concurrentG1RefineThread.hpp concurrentGCThread.hpp +concurrentG1RefineThread.hpp coTracker.hpp + +concurrentMark.cpp concurrentMark.hpp +concurrentMark.cpp concurrentMarkThread.inline.hpp +concurrentMark.cpp g1CollectedHeap.inline.hpp +concurrentMark.cpp g1CollectorPolicy.hpp +concurrentMark.cpp g1RemSet.hpp +concurrentMark.cpp gcOverheadReporter.hpp +concurrentMark.cpp genOopClosures.inline.hpp +concurrentMark.cpp heapRegionRemSet.hpp +concurrentMark.cpp heapRegionSeq.inline.hpp +concurrentMark.cpp handles.inline.hpp +concurrentMark.cpp java.hpp +concurrentMark.cpp oop.inline.hpp +concurrentMark.cpp referencePolicy.hpp +concurrentMark.cpp resourceArea.hpp +concurrentMark.cpp symbolTable.hpp + +concurrentMark.hpp coTracker.hpp +concurrentMark.hpp heapRegion.hpp +concurrentMark.hpp taskqueue.hpp + +concurrentMarkThread.cpp concurrentMarkThread.inline.hpp +concurrentMarkThread.cpp g1CollectedHeap.inline.hpp +concurrentMarkThread.cpp g1CollectorPolicy.hpp +concurrentMarkThread.cpp g1MMUTracker.hpp +concurrentMarkThread.cpp resourceArea.hpp +concurrentMarkThread.cpp vm_operations_g1.hpp +concurrentMarkThread.cpp vmThread.hpp + +concurrentMarkThread.hpp concurrentGCThread.hpp + +concurrentMarkThread.inline.hpp concurrentMark.hpp +concurrentMarkThread.inline.hpp concurrentMarkThread.hpp + +concurrentZFThread.cpp concurrentZFThread.hpp +concurrentZFThread.cpp heapRegion.hpp +concurrentZFThread.cpp g1CollectedHeap.inline.hpp +concurrentZFThread.cpp copy.hpp +concurrentZFThread.cpp mutexLocker.hpp +concurrentZFThread.cpp space.inline.hpp + +concurrentZFThread.hpp concurrentGCThread.hpp +concurrentZFThread.hpp coTracker.hpp + +dirtyCardQueue.cpp atomic.hpp +dirtyCardQueue.cpp dirtyCardQueue.hpp +dirtyCardQueue.cpp heapRegionRemSet.hpp +dirtyCardQueue.cpp mutexLocker.hpp +dirtyCardQueue.cpp ptrQueue.inline.hpp +dirtyCardQueue.cpp safepoint.hpp +dirtyCardQueue.cpp thread.hpp +dirtyCardQueue.cpp thread_.inline.hpp +dirtyCardQueue.cpp workgroup.hpp + +dirtyCardQueue.hpp allocation.hpp +dirtyCardQueue.hpp ptrQueue.hpp + +g1BlockOffsetTable.cpp g1BlockOffsetTable.inline.hpp +g1BlockOffsetTable.cpp java.hpp +g1BlockOffsetTable.cpp oop.inline.hpp +g1BlockOffsetTable.cpp space.hpp + +g1BlockOffsetTable.hpp globalDefinitions.hpp +g1BlockOffsetTable.hpp memRegion.hpp +g1BlockOffsetTable.hpp virtualspace.hpp + +g1BlockOffsetTable.inline.hpp g1BlockOffsetTable.hpp +g1BlockOffsetTable.inline.hpp space.hpp + +g1CollectedHeap.cpp aprofiler.hpp +g1CollectedHeap.cpp bufferingOopClosure.hpp +g1CollectedHeap.cpp concurrentG1Refine.hpp +g1CollectedHeap.cpp concurrentG1RefineThread.hpp +g1CollectedHeap.cpp concurrentMarkThread.inline.hpp +g1CollectedHeap.cpp concurrentZFThread.hpp +g1CollectedHeap.cpp g1CollectedHeap.inline.hpp +g1CollectedHeap.cpp g1CollectorPolicy.hpp +g1CollectedHeap.cpp g1MarkSweep.hpp +g1CollectedHeap.cpp g1RemSet.hpp +g1CollectedHeap.cpp g1OopClosures.inline.hpp +g1CollectedHeap.cpp genOopClosures.inline.hpp +g1CollectedHeap.cpp gcLocker.inline.hpp +g1CollectedHeap.cpp gcOverheadReporter.hpp +g1CollectedHeap.cpp generationSpec.hpp +g1CollectedHeap.cpp heapRegionRemSet.hpp +g1CollectedHeap.cpp heapRegionSeq.inline.hpp +g1CollectedHeap.cpp icBuffer.hpp +g1CollectedHeap.cpp isGCActiveMark.hpp +g1CollectedHeap.cpp oop.inline.hpp +g1CollectedHeap.cpp oop.pcgc.inline.hpp +g1CollectedHeap.cpp parGCAllocBuffer.hpp +g1CollectedHeap.cpp vm_operations_g1.hpp +g1CollectedHeap.cpp vmThread.hpp + +g1CollectedHeap.hpp barrierSet.hpp +g1CollectedHeap.hpp heapRegion.hpp +g1CollectedHeap.hpp memRegion.hpp +g1CollectedHeap.hpp sharedHeap.hpp + +g1CollectedHeap.inline.hpp concurrentMark.hpp +g1CollectedHeap.inline.hpp g1CollectedHeap.hpp +g1CollectedHeap.inline.hpp heapRegionSeq.hpp +g1CollectedHeap.inline.hpp taskqueue.hpp + +g1CollectorPolicy.cpp concurrentG1Refine.hpp +g1CollectorPolicy.cpp concurrentMark.hpp +g1CollectorPolicy.cpp concurrentMarkThread.inline.hpp +g1CollectorPolicy.cpp debug.hpp +g1CollectorPolicy.cpp java.hpp +g1CollectorPolicy.cpp g1CollectedHeap.hpp +g1CollectorPolicy.cpp g1CollectorPolicy.hpp +g1CollectorPolicy.cpp heapRegionRemSet.hpp +g1CollectorPolicy.cpp mutexLocker.hpp + +g1CollectorPolicy.hpp collectorPolicy.hpp +g1CollectorPolicy.hpp collectionSetChooser.hpp +g1CollectorPolicy.hpp g1MMUTracker.hpp + +g1_globals.cpp g1_globals.hpp + +g1_globals.hpp globals.hpp + +globals.cpp g1_globals.hpp +top.hpp g1_globals.hpp + +g1MarkSweep.cpp aprofiler.hpp +g1MarkSweep.cpp codeCache.hpp +g1MarkSweep.cpp events.hpp +g1MarkSweep.cpp fprofiler.hpp +g1MarkSweep.hpp g1CollectedHeap.hpp +g1MarkSweep.cpp g1MarkSweep.hpp +g1MarkSweep.cpp gcLocker.hpp +g1MarkSweep.cpp genCollectedHeap.hpp +g1MarkSweep.hpp heapRegion.hpp +g1MarkSweep.cpp icBuffer.hpp +g1MarkSweep.cpp instanceRefKlass.hpp +g1MarkSweep.cpp javaClasses.hpp +g1MarkSweep.cpp jvmtiExport.hpp +g1MarkSweep.cpp copy.hpp +g1MarkSweep.cpp modRefBarrierSet.hpp +g1MarkSweep.cpp oop.inline.hpp +g1MarkSweep.cpp referencePolicy.hpp +g1MarkSweep.cpp space.hpp +g1MarkSweep.cpp symbolTable.hpp +g1MarkSweep.cpp synchronizer.hpp +g1MarkSweep.cpp systemDictionary.hpp +g1MarkSweep.cpp thread.hpp +g1MarkSweep.cpp vmSymbols.hpp +g1MarkSweep.cpp vmThread.hpp + +g1MarkSweep.hpp generation.hpp +g1MarkSweep.hpp growableArray.hpp +g1MarkSweep.hpp markOop.hpp +g1MarkSweep.hpp genMarkSweep.hpp +g1MarkSweep.hpp oop.hpp +g1MarkSweep.hpp timer.hpp +g1MarkSweep.hpp universe.hpp + +g1OopClosures.inline.hpp concurrentMark.hpp +g1OopClosures.inline.hpp g1OopClosures.hpp +g1OopClosures.inline.hpp g1CollectedHeap.hpp +g1OopClosures.inline.hpp g1RemSet.hpp + +g1MMUTracker.cpp g1MMUTracker.hpp +g1MMUTracker.cpp ostream.hpp +g1MMUTracker.cpp mutexLocker.hpp + +g1MMUTracker.hpp debug.hpp + +g1RemSet.cpp bufferingOopClosure.hpp +g1RemSet.cpp concurrentG1Refine.hpp +g1RemSet.cpp concurrentG1RefineThread.hpp +g1RemSet.cpp g1BlockOffsetTable.inline.hpp +g1RemSet.cpp g1CollectedHeap.inline.hpp +g1RemSet.cpp g1CollectorPolicy.hpp +g1RemSet.cpp g1RemSet.inline.hpp +g1RemSet.cpp g1OopClosures.inline.hpp +g1RemSet.cpp heapRegionSeq.inline.hpp +g1RemSet.cpp intHisto.hpp +g1RemSet.cpp iterator.hpp +g1RemSet.cpp oop.inline.hpp + +g1RemSet.inline.hpp g1RemSet.hpp +g1RemSet.inline.hpp heapRegionRemSet.hpp + +g1SATBCardTableModRefBS.cpp g1SATBCardTableModRefBS.hpp +g1SATBCardTableModRefBS.cpp heapRegion.hpp +g1SATBCardTableModRefBS.cpp mutexLocker.hpp +g1SATBCardTableModRefBS.cpp thread.hpp +g1SATBCardTableModRefBS.cpp thread_.inline.hpp +g1SATBCardTableModRefBS.cpp satbQueue.hpp + +g1SATBCardTableModRefBS.hpp cardTableModRefBS.hpp +g1SATBCardTableModRefBS.hpp memRegion.hpp + +heapRegion.cpp concurrentZFThread.hpp +heapRegion.cpp g1BlockOffsetTable.inline.hpp +heapRegion.cpp g1CollectedHeap.inline.hpp +heapRegion.cpp g1OopClosures.inline.hpp +heapRegion.cpp genOopClosures.inline.hpp +heapRegion.cpp heapRegion.inline.hpp +heapRegion.cpp heapRegionRemSet.hpp +heapRegion.cpp heapRegionSeq.inline.hpp +heapRegion.cpp iterator.hpp +heapRegion.cpp oop.inline.hpp + +heapRegion.hpp space.hpp +heapRegion.hpp g1BlockOffsetTable.hpp +heapRegion.hpp watermark.hpp +heapRegion.hpp g1_specialized_oop_closures.hpp +heapRegion.hpp survRateGroup.hpp + +heapRegionRemSet.hpp sparsePRT.hpp + +heapRegionRemSet.cpp allocation.hpp +heapRegionRemSet.cpp bitMap.inline.hpp +heapRegionRemSet.cpp g1BlockOffsetTable.inline.hpp +heapRegionRemSet.cpp g1CollectedHeap.inline.hpp +heapRegionRemSet.cpp heapRegionRemSet.hpp +heapRegionRemSet.cpp heapRegionSeq.inline.hpp +heapRegionRemSet.cpp globalDefinitions.hpp +heapRegionRemSet.cpp space.inline.hpp + +heapRegionSeq.cpp allocation.hpp +heapRegionSeq.cpp g1CollectedHeap.hpp +heapRegionSeq.cpp heapRegionSeq.hpp + +heapRegionSeq.hpp growableArray.hpp +heapRegionSeq.hpp heapRegion.hpp + +heapRegionSeq.inline.hpp heapRegionSeq.hpp + +klass.hpp g1OopClosures.hpp + +ptrQueue.cpp allocation.hpp +ptrQueue.cpp allocation.inline.hpp +ptrQueue.cpp mutex.hpp +ptrQueue.cpp mutexLocker.hpp +ptrQueue.cpp ptrQueue.hpp +ptrQueue.cpp ptrQueue.inline.hpp +ptrQueue.cpp thread_.inline.hpp + +ptrQueue.hpp allocation.hpp +ptrQueue.hpp sizes.hpp + +ptrQueue.inline.hpp ptrQueue.hpp + +satbQueue.cpp allocation.inline.hpp +satbQueue.cpp mutexLocker.hpp +satbQueue.cpp ptrQueue.inline.hpp +satbQueue.cpp satbQueue.hpp +satbQueue.cpp sharedHeap.hpp +satbQueue.cpp thread.hpp + +satbQueue.hpp ptrQueue.hpp + +sparsePRT.cpp allocation.inline.hpp +sparsePRT.cpp cardTableModRefBS.hpp +sparsePRT.cpp heapRegion.hpp +sparsePRT.cpp heapRegionRemSet.hpp +sparsePRT.cpp mutexLocker.hpp +sparsePRT.cpp sparsePRT.hpp +sparsePRT.cpp space.inline.hpp + +sparsePRT.hpp allocation.hpp +sparsePRT.hpp cardTableModRefBS.hpp +sparsePRT.hpp globalDefinitions.hpp +sparsePRT.hpp heapRegion.hpp +sparsePRT.hpp mutex.hpp + +specialized_oop_closures.hpp g1_specialized_oop_closures.hpp + +survRateGroup.hpp numberSeq.hpp + +survRateGroup.cpp allocation.hpp +survRateGroup.cpp g1CollectedHeap.hpp +survRateGroup.cpp g1CollectorPolicy.hpp +survRateGroup.cpp heapRegion.hpp +survRateGroup.cpp survRateGroup.hpp + +thread.cpp concurrentMarkThread.inline.hpp + +universe.cpp g1CollectedHeap.hpp +universe.cpp g1CollectorPolicy.hpp + +vm_operations_g1.hpp vmGCOperations.hpp + +vm_operations_g1.cpp vm_operations_g1.hpp +vm_operations_g1.cpp g1CollectedHeap.hpp +vm_operations_g1.cpp isGCActiveMark.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_parallelScavenge --- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Thu Jun 05 15:57:56 2008 -0700 @@ -148,7 +148,6 @@ parallelScavengeHeap.hpp psYoungGen.hpp parallelScavengeHeap.hpp ostream.hpp -parMarkBitMap.cpp bitMap.hpp parMarkBitMap.cpp bitMap.inline.hpp parMarkBitMap.cpp oop.inline.hpp parMarkBitMap.cpp os.hpp @@ -157,7 +156,6 @@ parMarkBitMap.cpp parMarkBitMap.inline.hpp parMarkBitMap.cpp psParallelCompact.hpp -parMarkBitMap.hpp bitMap.hpp parMarkBitMap.hpp bitMap.inline.hpp parMarkBitMap.hpp psVirtualspace.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_shared --- a/src/share/vm/gc_implementation/includeDB_gc_shared Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_shared Thu Jun 05 15:57:56 2008 -0700 @@ -24,6 +24,23 @@ // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! +concurrentGCThread.cpp concurrentGCThread.hpp +concurrentGCThread.cpp init.hpp +concurrentGCThread.cpp instanceRefKlass.hpp +concurrentGCThread.cpp interfaceSupport.hpp +concurrentGCThread.cpp java.hpp +concurrentGCThread.cpp javaCalls.hpp +concurrentGCThread.cpp oop.inline.hpp +concurrentGCThread.cpp systemDictionary.hpp + +concurrentGCThread.hpp thread.hpp + +coTracker.hpp globalDefinitions.hpp +coTracker.hpp numberSeq.hpp + +coTracker.cpp coTracker.hpp +coTracker.cpp os.hpp + allocationStats.cpp allocationStats.hpp allocationStats.cpp ostream.hpp @@ -37,6 +54,13 @@ gcAdaptivePolicyCounters.cpp resourceArea.hpp gcAdaptivePolicyCounters.cpp gcAdaptivePolicyCounters.hpp +gcOverheadReporter.cpp allocation.inline.hpp +gcOverheadReporter.cpp concurrentGCThread.hpp +gcOverheadReporter.cpp coTracker.hpp +gcOverheadReporter.cpp gcOverheadReporter.hpp +gcOverheadReporter.cpp ostream.hpp +gcOverheadReporter.cpp thread_.inline.hpp + gSpaceCounters.cpp generation.hpp gSpaceCounters.cpp resourceArea.hpp gSpaceCounters.cpp gSpaceCounters.hpp @@ -72,3 +96,5 @@ spaceCounters.hpp mutableSpace.hpp spaceCounters.hpp perfData.hpp spaceCounters.hpp generationCounters.hpp + +vmGCOperations.cpp g1CollectedHeap.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -192,16 +192,16 @@ }; inline ParMarkBitMap::ParMarkBitMap(): - _beg_bits(NULL, 0), - _end_bits(NULL, 0) + _beg_bits(), + _end_bits() { _region_start = 0; _virtual_space = 0; } inline ParMarkBitMap::ParMarkBitMap(MemRegion covered_region): - _beg_bits(NULL, 0), - _end_bits(NULL, 0) + _beg_bits(), + _end_bits() { initialize(covered_region); } @@ -325,7 +325,7 @@ inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const { - const idx_t end_bit = _end_bits.find_next_one_bit(beg_bit, size()); + const idx_t end_bit = _end_bits.get_next_one_offset_inline(beg_bit, size()); assert(is_marked(beg_bit), "obj not marked"); assert(end_bit < size(), "end bit missing"); return obj_size(beg_bit, end_bit); @@ -384,13 +384,13 @@ inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) const { - return _beg_bits.find_next_one_bit(beg, end); + return _beg_bits.get_next_one_offset_inline_aligned_right(beg, end); } inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const { - return _end_bits.find_next_one_bit(beg, end); + return _end_bits.get_next_one_offset_inline_aligned_right(beg, end); } inline HeapWord* diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -184,6 +184,20 @@ size_t tlab_capacity(Thread* thr) const; size_t unsafe_max_tlab_alloc(Thread* thr) const; + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + return true; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + return true; + } + void oop_iterate(OopClosure* cl); void object_iterate(ObjectClosure* cl); void permanent_oop_iterate(OopClosure* cl); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/coTracker.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/coTracker.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,189 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_coTracker.cpp.incl" + +COTracker* COTracker::_head = NULL; +double COTracker::_cpu_number = -1.0; + +void +COTracker::resetPeriod(double now_sec, double vnow_sec) { + guarantee( _enabled, "invariant" ); + _period_start_time_sec = now_sec; + _period_start_vtime_sec = vnow_sec; +} + +void +COTracker::setConcOverhead(double time_stamp_sec, + double conc_overhead) { + guarantee( _enabled, "invariant" ); + _conc_overhead = conc_overhead; + _time_stamp_sec = time_stamp_sec; + if (conc_overhead > 0.001) + _conc_overhead_seq.add(conc_overhead); +} + +void +COTracker::reset(double starting_conc_overhead) { + guarantee( _enabled, "invariant" ); + double now_sec = os::elapsedTime(); + setConcOverhead(now_sec, starting_conc_overhead); +} + +void +COTracker::start() { + guarantee( _enabled, "invariant" ); + resetPeriod(os::elapsedTime(), os::elapsedVTime()); +} + +void +COTracker::update(bool force_end) { + assert( _enabled, "invariant" ); + double end_time_sec = os::elapsedTime(); + double elapsed_time_sec = end_time_sec - _period_start_time_sec; + if (force_end || elapsed_time_sec > _update_period_sec) { + // reached the end of the period + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - _period_start_vtime_sec; + + double conc_overhead = elapsed_vtime_sec / elapsed_time_sec; + + setConcOverhead(end_time_sec, conc_overhead); + resetPeriod(end_time_sec, end_vtime_sec); + } +} + +void +COTracker::updateForSTW(double start_sec, double end_sec) { + if (!_enabled) + return; + + // During a STW pause, no concurrent GC thread has done any + // work. So, we can safely adjust the start of the current period by + // adding the duration of the STW pause to it, so that the STW pause + // doesn't affect the reading of the concurrent overhead (it's + // basically like excluding the time of the STW pause from the + // concurrent overhead calculation). + + double stw_duration_sec = end_sec - start_sec; + guarantee( stw_duration_sec > 0.0, "invariant" ); + + if (outOfDate(start_sec)) + _conc_overhead = 0.0; + else + _time_stamp_sec = end_sec; + _period_start_time_sec += stw_duration_sec; + _conc_overhead_seq = NumberSeq(); + + guarantee( os::elapsedTime() > _period_start_time_sec, "invariant" ); +} + +double +COTracker::predConcOverhead() { + if (_enabled) { + // tty->print(" %1.2lf", _conc_overhead_seq.maximum()); + return _conc_overhead_seq.maximum(); + } else { + // tty->print(" DD"); + return 0.0; + } +} + +void +COTracker::resetPred() { + _conc_overhead_seq = NumberSeq(); +} + +COTracker::COTracker(int group) + : _enabled(false), + _group(group), + _period_start_time_sec(-1.0), + _period_start_vtime_sec(-1.0), + _conc_overhead(-1.0), + _time_stamp_sec(-1.0), + _next(NULL) { + // GCOverheadReportingPeriodMS indicates how frequently the + // concurrent overhead will be recorded by the GC Overhead + // Reporter. We want to take readings less often than that. If we + // took readings more often than some of them might be lost. + _update_period_sec = ((double) GCOverheadReportingPeriodMS) / 1000.0 * 1.25; + _next = _head; + _head = this; + + if (_cpu_number < 0.0) + _cpu_number = (double) os::processor_count(); +} + +// statics + +void +COTracker::updateAllForSTW(double start_sec, double end_sec) { + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + curr->updateForSTW(start_sec, end_sec); + } +} + +double +COTracker::totalConcOverhead(double now_sec) { + double total_conc_overhead = 0.0; + + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + double conc_overhead = curr->concOverhead(now_sec); + total_conc_overhead += conc_overhead; + } + + return total_conc_overhead; +} + +double +COTracker::totalConcOverhead(double now_sec, + size_t group_num, + double* co_per_group) { + double total_conc_overhead = 0.0; + + for (size_t i = 0; i < group_num; ++i) + co_per_group[i] = 0.0; + + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + size_t group = curr->_group; + assert( 0 <= group && group < group_num, "invariant" ); + double conc_overhead = curr->concOverhead(now_sec); + + co_per_group[group] += conc_overhead; + total_conc_overhead += conc_overhead; + } + + return total_conc_overhead; +} + +double +COTracker::totalPredConcOverhead() { + double total_pred_conc_overhead = 0.0; + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + total_pred_conc_overhead += curr->predConcOverhead(); + curr->resetPred(); + } + return total_pred_conc_overhead / _cpu_number; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/coTracker.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/coTracker.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,181 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// COTracker keeps track of the concurrent overhead of a GC thread. + +// A thread that needs to be tracked must, itself, start up its +// tracker with the start() method and then call the update() method +// at regular intervals. What the tracker does is to calculate the +// concurrent overhead of a process at a given update period. The +// tracker starts and when is detects that it has exceeded the given +// period, it calculates the duration of the period in wall-clock time +// and the duration of the period in vtime (i.e. how much time the +// concurrent processes really took up during this period). The ratio +// of the latter over the former is the concurrent overhead of that +// process for that period over a single CPU. This overhead is stored +// on the tracker, "timestamped" with the wall-clock time of the end +// of the period. When the concurrent overhead of this process needs +// to be queried, this last "reading" provides a good approximation +// (we assume that the concurrent overhead of a particular thread +// stays largely constant over time). The timestamp is necessary to +// detect when the process has stopped working and the recorded +// reading hasn't been updated for some time. + +// Each concurrent GC thread is considered to be part of a "group" +// (i.e. any available concurrent marking threads are part of the +// "concurrent marking thread group"). A COTracker is associated with +// a single group at construction-time. It's up to each collector to +// decide how groups will be mapped to such an id (ids should start +// from 0 and be consecutive; there's a hardcoded max group num +// defined on the GCOverheadTracker class). The notion of a group has +// been introduced to be able to identify how much overhead was +// imposed by each group, instead of getting a single value that +// covers all concurrent overhead. + +class COTracker { +private: + // It indicates whether this tracker is enabled or not. When the + // tracker is disabled, then it returns 0.0 as the latest concurrent + // overhead and several methods (reset, start, and update) are not + // supposed to be called on it. This enabling / disabling facility + // is really provided to make a bit more explicit in the code when a + // particulary tracker of a processes that doesn't run all the time + // (e.g. concurrent marking) is supposed to be used and not it's not. + bool _enabled; + + // The ID of the group associated with this tracker. + int _group; + + // The update period of the tracker. A new value for the concurrent + // overhead of the associated process will be made at intervals no + // smaller than this. + double _update_period_sec; + + // The start times (both wall-block time and vtime) of the current + // interval. + double _period_start_time_sec; + double _period_start_vtime_sec; + + // Number seq of the concurrent overhead readings within a period + NumberSeq _conc_overhead_seq; + + // The latest reading of the concurrent overhead (over a single CPU) + // imposed by the associated concurrent thread, made available at + // the indicated wall-clock time. + double _conc_overhead; + double _time_stamp_sec; + + // The number of CPUs that the host machine has (for convenience + // really, as we'd have to keep translating it into a double) + static double _cpu_number; + + // Fields that keep a list of all trackers created. This is useful, + // since it allows us to sum up the concurrent overhead without + // having to write code for a specific collector to broadcast a + // request to all its concurrent processes. + COTracker* _next; + static COTracker* _head; + + // It indicates that a new period is starting by updating the + // _period_start_time_sec and _period_start_vtime_sec fields. + void resetPeriod(double now_sec, double vnow_sec); + // It updates the latest concurrent overhead reading, taken at a + // given wall-clock time. + void setConcOverhead(double time_stamp_sec, double conc_overhead); + + // It determines whether the time stamp of the latest concurrent + // overhead reading is out of date or not. + bool outOfDate(double now_sec) { + // The latest reading is considered out of date, if it was taken + // 1.2x the update period. + return (now_sec - _time_stamp_sec) > 1.2 * _update_period_sec; + } + +public: + // The constructor which associates the tracker with a group ID. + COTracker(int group); + + // Methods to enable / disable the tracker and query whether it is enabled. + void enable() { _enabled = true; } + void disable() { _enabled = false; } + bool enabled() { return _enabled; } + + // It resets the tracker and sets concurrent overhead reading to be + // the given parameter and the associated time stamp to be now. + void reset(double starting_conc_overhead = 0.0); + // The tracker starts tracking. IT should only be called from the + // concurrent thread that is tracked by this tracker. + void start(); + // It updates the tracker and, if the current period is longer than + // the update period, the concurrent overhead reading will be + // updated. force_end being true indicates that it's the last call + // to update() by this process before the tracker is disabled (the + // tracker can be re-enabled later if necessary). It should only be + // called from the concurrent thread that is tracked by this tracker + // and while the thread has joined the STS. + void update(bool force_end = false); + // It adjusts the contents of the tracker to take into account a STW + // pause. + void updateForSTW(double start_sec, double end_sec); + + // It returns the last concurrent overhead reading over a single + // CPU. If the reading is out of date, or the tracker is disabled, + // it returns 0.0. + double concCPUOverhead(double now_sec) { + if (!_enabled || outOfDate(now_sec)) + return 0.0; + else + return _conc_overhead; + } + + // It returns the last concurrent overhead reading over all CPUs + // that the host machine has. If the reading is out of date, or the + // tracker is disabled, it returns 0.0. + double concOverhead(double now_sec) { + return concCPUOverhead(now_sec) / _cpu_number; + } + + double predConcOverhead(); + + void resetPred(); + + // statics + + // It notifies all trackers about a STW pause. + static void updateAllForSTW(double start_sec, double end_sec); + + // It returns the sum of the concurrent overhead readings of all + // available (and enabled) trackers for the given time stamp. The + // overhead is over all the CPUs of the host machine. + + static double totalConcOverhead(double now_sec); + // Like the previous method, but it also sums up the overheads per + // group number. The length of the co_per_group array must be at + // least as large group_num + static double totalConcOverhead(double now_sec, + size_t group_num, + double* co_per_group); + + static double totalPredConcOverhead(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/concurrentGCThread.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,314 @@ +/* + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// CopyrightVersion 1.2 + +# include "incls/_precompiled.incl" +# include "incls/_concurrentGCThread.cpp.incl" + +bool ConcurrentGCThread::_should_terminate = false; +bool ConcurrentGCThread::_has_terminated = false; +int ConcurrentGCThread::_CGC_flag = CGC_nil; + +SuspendibleThreadSet ConcurrentGCThread::_sts; + +ConcurrentGCThread::ConcurrentGCThread() { + _sts.initialize(); +}; + +void ConcurrentGCThread::stopWorldAndDo(VoidClosure* op) { + MutexLockerEx x(Heap_lock, + Mutex::_no_safepoint_check_flag); + // warning("CGC: about to try stopping world"); + SafepointSynchronize::begin(); + // warning("CGC: successfully stopped world"); + op->do_void(); + SafepointSynchronize::end(); + // warning("CGC: successfully restarted world"); +} + +void ConcurrentGCThread::safepoint_synchronize() { + _sts.suspend_all(); +} + +void ConcurrentGCThread::safepoint_desynchronize() { + _sts.resume_all(); +} + +void ConcurrentGCThread::create_and_start() { + if (os::create_thread(this, os::cgc_thread)) { + // XXX: need to set this to low priority + // unless "agressive mode" set; priority + // should be just less than that of VMThread. + os::set_priority(this, NearMaxPriority); + if (!_should_terminate && !DisableStartThread) { + os::start_thread(this); + } + } +} + +void ConcurrentGCThread::initialize_in_thread() { + this->record_stack_base_and_size(); + this->initialize_thread_local_storage(); + this->set_active_handles(JNIHandleBlock::allocate_block()); + // From this time Thread::current() should be working. + assert(this == Thread::current(), "just checking"); +} + +void ConcurrentGCThread::wait_for_universe_init() { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + while (!is_init_completed() && !_should_terminate) { + CGC_lock->wait(Mutex::_no_safepoint_check_flag, 200); + } +} + +void ConcurrentGCThread::terminate() { + // Signal that it is terminated + { + MutexLockerEx mu(Terminator_lock, + Mutex::_no_safepoint_check_flag); + _has_terminated = true; + Terminator_lock->notify(); + } + + // Thread destructor usually does this.. + ThreadLocalStorage::set_thread(NULL); +} + + +void SuspendibleThreadSet::initialize_work() { + MutexLocker x(STS_init_lock); + if (!_initialized) { + _m = new Monitor(Mutex::leaf, + "SuspendibleThreadSetLock", true); + _async = 0; + _async_stop = false; + _async_stopped = 0; + _initialized = true; + } +} + +void SuspendibleThreadSet::join() { + initialize(); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag); + _async++; + assert(_async > 0, "Huh."); +} + +void SuspendibleThreadSet::leave() { + assert(_initialized, "Must be initialized."); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + _async--; + assert(_async >= 0, "Huh."); + if (_async_stop) _m->notify_all(); +} + +void SuspendibleThreadSet::yield(const char* id) { + assert(_initialized, "Must be initialized."); + if (_async_stop) { + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + if (_async_stop) { + _async_stopped++; + assert(_async_stopped > 0, "Huh."); + if (_async_stopped == _async) { + if (ConcGCYieldTimeout > 0) { + double now = os::elapsedTime(); + guarantee((now - _suspend_all_start) * 1000.0 < + (double)ConcGCYieldTimeout, + "Long delay; whodunit?"); + } + } + _m->notify_all(); + while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag); + _async_stopped--; + assert(_async >= 0, "Huh"); + _m->notify_all(); + } + } +} + +void SuspendibleThreadSet::suspend_all() { + initialize(); // If necessary. + if (ConcGCYieldTimeout > 0) { + _suspend_all_start = os::elapsedTime(); + } + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + assert(!_async_stop, "Only one at a time."); + _async_stop = true; + while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag); +} + +void SuspendibleThreadSet::resume_all() { + assert(_initialized, "Must be initialized."); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + assert(_async_stopped == _async, "Huh."); + _async_stop = false; + _m->notify_all(); +} + +static void _sltLoop(JavaThread* thread, TRAPS) { + SurrogateLockerThread* slt = (SurrogateLockerThread*)thread; + slt->loop(); +} + +SurrogateLockerThread::SurrogateLockerThread() : + JavaThread(&_sltLoop), + _monitor(Mutex::nonleaf, "SLTMonitor"), + _buffer(empty) +{} + +SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) { + klassOop k = + SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(), + true, CHECK_NULL); + instanceKlassHandle klass (THREAD, k); + instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL); + + const char thread_name[] = "Surrogate Locker Thread (CMS)"; + Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL); + + // Initialize thread_oop to put it into the system threadGroup + Handle thread_group (THREAD, Universe::system_thread_group()); + JavaValue result(T_VOID); + JavaCalls::call_special(&result, thread_oop, + klass, + vmSymbolHandles::object_initializer_name(), + vmSymbolHandles::threadgroup_string_void_signature(), + thread_group, + string, + CHECK_NULL); + + SurrogateLockerThread* res; + { + MutexLocker mu(Threads_lock); + res = new SurrogateLockerThread(); + + // At this point it may be possible that no osthread was created for the + // JavaThread due to lack of memory. We would have to throw an exception + // in that case. However, since this must work and we do not allow + // exceptions anyway, check and abort if this fails. + if (res == NULL || res->osthread() == NULL) { + vm_exit_during_initialization("java.lang.OutOfMemoryError", + "unable to create new native thread"); + } + java_lang_Thread::set_thread(thread_oop(), res); + java_lang_Thread::set_priority(thread_oop(), NearMaxPriority); + java_lang_Thread::set_daemon(thread_oop()); + + res->set_threadObj(thread_oop()); + Threads::add(res); + Thread::start(res); + } + os::yield(); // This seems to help with initial start-up of SLT + return res; +} + +void SurrogateLockerThread::manipulatePLL(SLT_msg_type msg) { + MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag); + assert(_buffer == empty, "Should be empty"); + assert(msg != empty, "empty message"); + _buffer = msg; + while (_buffer != empty) { + _monitor.notify(); + _monitor.wait(Mutex::_no_safepoint_check_flag); + } +} + +// ======= Surrogate Locker Thread ============= + +void SurrogateLockerThread::loop() { + BasicLock pll_basic_lock; + SLT_msg_type msg; + debug_only(unsigned int owned = 0;) + + while (/* !isTerminated() */ 1) { + { + MutexLocker x(&_monitor); + // Since we are a JavaThread, we can't be here at a safepoint. + assert(!SafepointSynchronize::is_at_safepoint(), + "SLT is a JavaThread"); + // wait for msg buffer to become non-empty + while (_buffer == empty) { + _monitor.notify(); + _monitor.wait(); + } + msg = _buffer; + } + switch(msg) { + case acquirePLL: { + instanceRefKlass::acquire_pending_list_lock(&pll_basic_lock); + debug_only(owned++;) + break; + } + case releaseAndNotifyPLL: { + assert(owned > 0, "Don't have PLL"); + instanceRefKlass::release_and_notify_pending_list_lock(&pll_basic_lock); + debug_only(owned--;) + break; + } + case empty: + default: { + guarantee(false,"Unexpected message in _buffer"); + break; + } + } + { + MutexLocker x(&_monitor); + // Since we are a JavaThread, we can't be here at a safepoint. + assert(!SafepointSynchronize::is_at_safepoint(), + "SLT is a JavaThread"); + _buffer = empty; + _monitor.notify(); + } + } + assert(!_monitor.owned_by_self(), "Should unlock before exit."); +} + + +// ===== STS Access From Outside CGCT ===== + +void ConcurrentGCThread::stsYield(const char* id) { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.yield(id); +} + +bool ConcurrentGCThread::stsShouldYield() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + return _sts.should_yield(); +} + +void ConcurrentGCThread::stsJoin() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.join(); +} + +void ConcurrentGCThread::stsLeave() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.leave(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/concurrentGCThread.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,167 @@ +/* + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class VoidClosure; + +// A SuspendibleThreadSet is (obviously) a set of threads that can be +// suspended. A thread can join and later leave the set, and periodically +// yield. If some thread (not in the set) requests, via suspend_all, that +// the threads be suspended, then the requesting thread is blocked until +// all the threads in the set have yielded or left the set. (Threads may +// not enter the set when an attempted suspension is in progress.) The +// suspending thread later calls resume_all, allowing the suspended threads +// to continue. + +class SuspendibleThreadSet { + Monitor* _m; + int _async; + bool _async_stop; + int _async_stopped; + bool _initialized; + double _suspend_all_start; + + void initialize_work(); + + public: + SuspendibleThreadSet() : _initialized(false) {} + + // Add the current thread to the set. May block if a suspension + // is in progress. + void join(); + // Removes the current thread from the set. + void leave(); + // Returns "true" iff an suspension is in progress. + bool should_yield() { return _async_stop; } + // Suspends the current thread if a suspension is in progress (for + // the duration of the suspension.) + void yield(const char* id); + // Return when all threads in the set are suspended. + void suspend_all(); + // Allow suspended threads to resume. + void resume_all(); + // Redundant initializations okay. + void initialize() { + // Double-check dirty read idiom. + if (!_initialized) initialize_work(); + } +}; + + +class ConcurrentGCThread: public NamedThread { + friend class VMStructs; + +protected: + static bool _should_terminate; + static bool _has_terminated; + + enum CGC_flag_type { + CGC_nil = 0x0, + CGC_dont_suspend = 0x1, + CGC_CGC_safepoint = 0x2, + CGC_VM_safepoint = 0x4 + }; + + static int _CGC_flag; + + static bool CGC_flag_is_set(int b) { return (_CGC_flag & b) != 0; } + static int set_CGC_flag(int b) { return _CGC_flag |= b; } + static int reset_CGC_flag(int b) { return _CGC_flag &= ~b; } + + void stopWorldAndDo(VoidClosure* op); + + // All instances share this one set. + static SuspendibleThreadSet _sts; + + // Create and start the thread (setting it's priority high.) + void create_and_start(); + + // Do initialization steps in the thread: record stack base and size, + // init thread local storage, set JNI handle block. + void initialize_in_thread(); + + // Wait until Universe::is_fully_initialized(); + void wait_for_universe_init(); + + // Record that the current thread is terminating, and will do more + // concurrent work. + void terminate(); + +public: + // Constructor + + ConcurrentGCThread(); + ~ConcurrentGCThread() {} // Exists to call NamedThread destructor. + + // Tester + bool is_ConcurrentGC_thread() const { return true; } + + static void safepoint_synchronize(); + static void safepoint_desynchronize(); + + // All overridings should probably do _sts::yield, but we allow + // overriding for distinguished debugging messages. Default is to do + // nothing. + virtual void yield() {} + + bool should_yield() { return _sts.should_yield(); } + + // they are prefixed by sts since there are already yield() and + // should_yield() (non-static) methods in this class and it was an + // easy way to differentiate them. + static void stsYield(const char* id); + static bool stsShouldYield(); + static void stsJoin(); + static void stsLeave(); + +}; + +// The SurrogateLockerThread is used by concurrent GC threads for +// manipulating Java monitors, in particular, currently for +// manipulating the pending_list_lock. XXX +class SurrogateLockerThread: public JavaThread { + friend class VMStructs; + public: + enum SLT_msg_type { + empty = 0, // no message + acquirePLL, // acquire pending list lock + releaseAndNotifyPLL // notify and release pending list lock + }; + private: + // the following are shared with the CMSThread + SLT_msg_type _buffer; // communication buffer + Monitor _monitor; // monitor controlling buffer + BasicLock _basicLock; // used for PLL locking + + public: + static SurrogateLockerThread* make(TRAPS); + + SurrogateLockerThread(); + + bool is_hidden_from_external_view() const { return true; } + + void loop(); // main method + + void manipulatePLL(SLT_msg_type msg); + +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,179 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_gcOverheadReporter.cpp.incl" + +class COReportingThread : public ConcurrentGCThread { +private: + GCOverheadReporter* _reporter; + +public: + COReportingThread(GCOverheadReporter* reporter) : _reporter(reporter) { + guarantee( _reporter != NULL, "precondition" ); + create_and_start(); + } + + virtual void run() { + initialize_in_thread(); + wait_for_universe_init(); + + int period_ms = GCOverheadReportingPeriodMS; + + while ( true ) { + os::sleep(Thread::current(), period_ms, false); + + _sts.join(); + double now_sec = os::elapsedTime(); + _reporter->collect_and_record_conc_overhead(now_sec); + _sts.leave(); + } + + terminate(); + } +}; + +GCOverheadReporter* GCOverheadReporter::_reporter = NULL; + +GCOverheadReporter::GCOverheadReporter(size_t group_num, + const char* group_names[], + size_t length) + : _group_num(group_num), _prev_end_sec(0.0) { + guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum, + "precondition" ); + + _base = NEW_C_HEAP_ARRAY(GCOverheadReporterEntry, length); + _top = _base + length; + _curr = _base; + + for (size_t i = 0; i < group_num; ++i) { + guarantee( group_names[i] != NULL, "precondition" ); + _group_names[i] = group_names[i]; + } +} + +void +GCOverheadReporter::add(double start_sec, double end_sec, + double* conc_overhead, + double stw_overhead) { + assert( _curr <= _top, "invariant" ); + + if (_curr == _top) { + guarantee( false, "trace full" ); + return; + } + + _curr->_start_sec = start_sec; + _curr->_end_sec = end_sec; + for (size_t i = 0; i < _group_num; ++i) { + _curr->_conc_overhead[i] = + (conc_overhead != NULL) ? conc_overhead[i] : 0.0; + } + _curr->_stw_overhead = stw_overhead; + + ++_curr; +} + +void +GCOverheadReporter::collect_and_record_conc_overhead(double end_sec) { + double start_sec = _prev_end_sec; + guarantee( end_sec > start_sec, "invariant" ); + + double conc_overhead[MaxGCOverheadGroupNum]; + COTracker::totalConcOverhead(end_sec, _group_num, conc_overhead); + add_conc_overhead(start_sec, end_sec, conc_overhead); + _prev_end_sec = end_sec; +} + +void +GCOverheadReporter::record_stw_start(double start_sec) { + guarantee( start_sec > _prev_end_sec, "invariant" ); + collect_and_record_conc_overhead(start_sec); +} + +void +GCOverheadReporter::record_stw_end(double end_sec) { + double start_sec = _prev_end_sec; + COTracker::updateAllForSTW(start_sec, end_sec); + add_stw_overhead(start_sec, end_sec, 1.0); + + _prev_end_sec = end_sec; +} + +void +GCOverheadReporter::print() const { + tty->print_cr(""); + tty->print_cr("GC Overhead (%d entries)", _curr - _base); + tty->print_cr(""); + GCOverheadReporterEntry* curr = _base; + while (curr < _curr) { + double total = curr->_stw_overhead; + for (size_t i = 0; i < _group_num; ++i) + total += curr->_conc_overhead[i]; + + tty->print("OVERHEAD %12.8lf %12.8lf ", + curr->_start_sec, curr->_end_sec); + + for (size_t i = 0; i < _group_num; ++i) + tty->print("%s %12.8lf ", _group_names[i], curr->_conc_overhead[i]); + + tty->print_cr("STW %12.8lf TOT %12.8lf", curr->_stw_overhead, total); + ++curr; + } + tty->print_cr(""); +} + +// statics + +void +GCOverheadReporter::initGCOverheadReporter(size_t group_num, + const char* group_names[]) { + guarantee( _reporter == NULL, "should only be called once" ); + guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum, + "precondition" ); + guarantee( group_names != NULL, "pre-condition" ); + + if (GCOverheadReporting) { + _reporter = new GCOverheadReporter(group_num, group_names); + new COReportingThread(_reporter); + } +} + +void +GCOverheadReporter::recordSTWStart(double start_sec) { + if (_reporter != NULL) + _reporter->record_stw_start(start_sec); +} + +void +GCOverheadReporter::recordSTWEnd(double end_sec) { + if (_reporter != NULL) + _reporter->record_stw_end(end_sec); +} + +void +GCOverheadReporter::printGCOverhead() { + if (_reporter != NULL) + _reporter->print(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,141 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Keeps track of the GC overhead (both concurrent and STW). It stores +// it in a large array and then prints it to tty at the end of the +// execution. + +// See coTracker.hpp for the explanation on what groups are. + +// Let's set a maximum number of concurrent overhead groups, to +// statically allocate any arrays we need and not to have to +// malloc/free them. This is just a bit more convenient. +enum { + MaxGCOverheadGroupNum = 4 +}; + +typedef struct { + double _start_sec; + double _end_sec; + + double _conc_overhead[MaxGCOverheadGroupNum]; + double _stw_overhead; +} GCOverheadReporterEntry; + +class GCOverheadReporter { + friend class COReportingThread; + +private: + enum PrivateConstants { + DefaultReporterLength = 128 * 1024 + }; + + // Reference to the single instance of this class. + static GCOverheadReporter* _reporter; + + // These three references point to the array that contains the GC + // overhead entries (_base is the base of the array, _top is the + // address passed the last entry of the array, _curr is the next + // entry to be used). + GCOverheadReporterEntry* _base; + GCOverheadReporterEntry* _top; + GCOverheadReporterEntry* _curr; + + // The number of concurrent overhead groups. + size_t _group_num; + + // The wall-clock time of the end of the last recorded period of GC + // overhead. + double _prev_end_sec; + + // Names for the concurrent overhead groups. + const char* _group_names[MaxGCOverheadGroupNum]; + + // Add a new entry to the large array. conc_overhead being NULL is + // equivalent to an array full of 0.0s. conc_overhead should have a + // length of at least _group_num. + void add(double start_sec, double end_sec, + double* conc_overhead, + double stw_overhead); + + // Add an entry that represents concurrent GC overhead. + // conc_overhead must be at least of length _group_num. + // conc_overhead being NULL is equivalent to an array full of 0.0s. + void add_conc_overhead(double start_sec, double end_sec, + double* conc_overhead) { + add(start_sec, end_sec, conc_overhead, 0.0); + } + + // Add an entry that represents STW GC overhead. + void add_stw_overhead(double start_sec, double end_sec, + double stw_overhead) { + add(start_sec, end_sec, NULL, stw_overhead); + } + + // It records the start of a STW pause (i.e. it records the + // concurrent overhead up to that point) + void record_stw_start(double start_sec); + + // It records the end of a STW pause (i.e. it records the overhead + // associated with the pause and adjusts all the trackers to reflect + // the pause) + void record_stw_end(double end_sec); + + // It queries all the trackers of their concurrent overhead and + // records it. + void collect_and_record_conc_overhead(double end_sec); + + // It prints the contents of the GC overhead array + void print() const; + + + // Constructor. The same preconditions for group_num and group_names + // from initGCOverheadReporter apply here too. + GCOverheadReporter(size_t group_num, + const char* group_names[], + size_t length = DefaultReporterLength); + +public: + + // statics + + // It initialises the GCOverheadReporter and launches the concurrent + // overhead reporting thread. Both actions happen only if the + // GCOverheadReporting parameter is set. The length of the + // group_names array should be >= group_num and group_num should be + // <= MaxGCOverheadGroupNum. Entries group_namnes[0..group_num-1] + // should not be NULL. + static void initGCOverheadReporter(size_t group_num, + const char* group_names[]); + + // The following three are provided for convenience and they are + // wrappers around record_stw_start(start_sec), record_stw_end(end_sec), + // and print(). Each of these checks whether GC overhead reporting + // is on (i.e. _reporter != NULL) and, if it is, calls the + // corresponding method. Saves from repeating this pattern again and + // again from the places where they need to be called. + static void recordSTWStart(double start_sec); + static void recordSTWEnd(double end_sec); + static void printGCOverhead(); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/vmGCOperations.cpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -74,6 +74,7 @@ // If the GC count has changed someone beat us to the collection // Get the Heap_lock after the pending_list_lock. Heap_lock->lock(); + // Check invocations if (skip_operation()) { // skip collection @@ -82,6 +83,8 @@ _prologue_succeeded = false; } else { _prologue_succeeded = true; + SharedHeap* sh = SharedHeap::heap(); + if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = true; } return _prologue_succeeded; } @@ -90,6 +93,8 @@ void VM_GC_Operation::doit_epilogue() { assert(Thread::current()->is_Java_thread(), "just checking"); // Release the Heap_lock first. + SharedHeap* sh = SharedHeap::heap(); + if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = false; Heap_lock->unlock(); release_and_notify_pending_list_lock(); } @@ -148,12 +153,27 @@ void VM_GenCollectForPermanentAllocation::doit() { JvmtiGCForAllocationMarker jgcm; notify_gc_begin(true); - GenCollectedHeap* gch = GenCollectedHeap::heap(); - GCCauseSetter gccs(gch, _gc_cause); - gch->do_full_collection(gch->must_clear_all_soft_refs(), - gch->n_gens() - 1); - _res = gch->perm_gen()->allocate(_size, false); - assert(gch->is_in_reserved_or_null(_res), "result not in heap"); + SharedHeap* heap = (SharedHeap*)Universe::heap(); + GCCauseSetter gccs(heap, _gc_cause); + switch (heap->kind()) { + case (CollectedHeap::GenCollectedHeap): { + GenCollectedHeap* gch = (GenCollectedHeap*)heap; + gch->do_full_collection(gch->must_clear_all_soft_refs(), + gch->n_gens() - 1); + break; + } +#ifndef SERIALGC + case (CollectedHeap::G1CollectedHeap): { + G1CollectedHeap* g1h = (G1CollectedHeap*)heap; + g1h->do_full_collection(_gc_cause == GCCause::_last_ditch_collection); + break; + } +#endif // SERIALGC + default: + ShouldNotReachHere(); + } + _res = heap->perm_gen()->allocate(_size, false); + assert(heap->is_in_reserved_or_null(_res), "result not in heap"); if (_res == NULL && GC_locker::is_active_and_needs_gc()) { set_gc_locked(); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -138,13 +138,6 @@ return new_obj; } -bool CollectedHeap::can_elide_permanent_oop_store_barriers() const { - // %%% This needs refactoring. (It was gating logic from the server compiler.) - guarantee(kind() < CollectedHeap::G1CollectedHeap, ""); - return !UseConcMarkSweepGC; -} - - HeapWord* CollectedHeap::allocate_new_tlab(size_t size) { guarantee(false, "thread-local allocation buffers not supported"); return NULL; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -364,10 +364,8 @@ // Can a compiler initialize a new object without store barriers? // This permission only extends from the creation of a new object // via a TLAB up to the first subsequent safepoint. - virtual bool can_elide_tlab_store_barriers() const { - guarantee(kind() < CollectedHeap::G1CollectedHeap, "else change or refactor this"); - return true; - } + virtual bool can_elide_tlab_store_barriers() const = 0; + // If a compiler is eliding store barriers for TLAB-allocated objects, // there is probably a corresponding slow path which can produce // an object allocated anywhere. The compiler's runtime support @@ -379,12 +377,10 @@ // Can a compiler elide a store barrier when it writes // a permanent oop into the heap? Applies when the compiler // is storing x to the heap, where x->is_perm() is true. - virtual bool can_elide_permanent_oop_store_barriers() const; + virtual bool can_elide_permanent_oop_store_barriers() const = 0; // Does this heap support heap inspection (+PrintClassHistogram?) - virtual bool supports_heap_inspection() const { - return false; // Until RFE 5023697 is implemented - } + virtual bool supports_heap_inspection() const = 0; // Perform a collection of the heap; intended for use in implementing // "System.gc". This probably implies as full a collection as the diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.inline.hpp --- a/src/share/vm/gc_interface/collectedHeap.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -121,7 +121,7 @@ return result; } } - bool gc_overhead_limit_was_exceeded; + bool gc_overhead_limit_was_exceeded = false; result = Universe::heap()->mem_allocate(size, is_noref, false, diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/gcCause.hpp --- a/src/share/vm/gc_interface/gcCause.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/gc_interface/gcCause.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -60,6 +60,8 @@ _old_generation_too_full_to_scavenge, _adaptive_size_policy, + _g1_inc_collection_pause, _g1_pop_region_collection_pause, + _last_ditch_collection, _last_gc_cause }; @@ -68,12 +70,14 @@ return (cause == GCCause::_java_lang_system_gc || cause == GCCause::_jvmti_force_gc); } + inline static bool is_serviceability_requested_gc(GCCause::Cause cause) { return (cause == GCCause::_jvmti_force_gc || cause == GCCause::_heap_inspection || cause == GCCause::_heap_dump); } + // Return a string describing the GCCause. static const char* to_string(GCCause::Cause cause); // Return true if the GCCause is for a full collection. diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_compiler1 --- a/src/share/vm/includeDB_compiler1 Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/includeDB_compiler1 Thu Jun 05 15:57:56 2008 -0700 @@ -36,6 +36,9 @@ c1_CFGPrinter.hpp c1_Compilation.hpp c1_CFGPrinter.hpp c1_Instruction.hpp +cardTableModRefBS.cpp c1_LIR.hpp +cardTableModRefBS.cpp c1_LIRGenerator.hpp + c1_Canonicalizer.cpp c1_Canonicalizer.hpp c1_Canonicalizer.cpp c1_InstructionPrinter.hpp c1_Canonicalizer.cpp ciArray.hpp @@ -55,6 +58,7 @@ c1_CodeStubs_.cpp c1_LIRAssembler.hpp c1_CodeStubs_.cpp c1_MacroAssembler.hpp c1_CodeStubs_.cpp c1_Runtime1.hpp +c1_CodeStubs_.cpp g1SATBCardTableModRefBS.hpp c1_CodeStubs_.cpp nativeInst_.hpp c1_CodeStubs_.cpp sharedRuntime.hpp c1_CodeStubs_.cpp vmreg_.inline.hpp @@ -141,6 +145,7 @@ c1_globals_.hpp globalDefinitions.hpp c1_globals_.hpp macros.hpp +c1_GraphBuilder.cpp bitMap.inline.hpp c1_GraphBuilder.cpp bytecode.hpp c1_GraphBuilder.cpp c1_CFGPrinter.hpp c1_GraphBuilder.cpp c1_Canonicalizer.hpp @@ -158,6 +163,7 @@ c1_GraphBuilder.hpp ciMethodData.hpp c1_GraphBuilder.hpp ciStreams.hpp +c1_IR.cpp bitMap.inline.hpp c1_IR.cpp c1_Compilation.hpp c1_IR.cpp c1_FrameMap.hpp c1_IR.cpp c1_GraphBuilder.hpp @@ -232,33 +238,36 @@ c1_LIRAssembler_.hpp generate_platform_dependent_include -c1_LIRGenerator.cpp c1_Compilation.hpp -c1_LIRGenerator.cpp c1_FrameMap.hpp -c1_LIRGenerator.cpp c1_Instruction.hpp -c1_LIRGenerator.cpp c1_LIRAssembler.hpp -c1_LIRGenerator.cpp c1_LIRGenerator.hpp -c1_LIRGenerator.cpp c1_ValueStack.hpp -c1_LIRGenerator.cpp ciArrayKlass.hpp -c1_LIRGenerator.cpp ciInstance.hpp -c1_LIRGenerator.cpp sharedRuntime.hpp +c1_LIRGenerator.cpp bitMap.inline.hpp +c1_LIRGenerator.cpp c1_Compilation.hpp +c1_LIRGenerator.cpp c1_FrameMap.hpp +c1_LIRGenerator.cpp c1_Instruction.hpp +c1_LIRGenerator.cpp c1_LIRAssembler.hpp +c1_LIRGenerator.cpp c1_LIRGenerator.hpp +c1_LIRGenerator.cpp c1_ValueStack.hpp +c1_LIRGenerator.cpp ciArrayKlass.hpp +c1_LIRGenerator.cpp ciInstance.hpp +c1_LIRGenerator.cpp heapRegion.hpp +c1_LIRGenerator.cpp sharedRuntime.hpp -c1_LIRGenerator.hpp c1_Instruction.hpp -c1_LIRGenerator.hpp c1_LIR.hpp -c1_LIRGenerator.hpp ciMethodData.hpp -c1_LIRGenerator.hpp sizes.hpp +c1_LIRGenerator.hpp c1_Instruction.hpp +c1_LIRGenerator.hpp c1_LIR.hpp +c1_LIRGenerator.hpp ciMethodData.hpp +c1_LIRGenerator.hpp sizes.hpp -c1_LIRGenerator_.cpp c1_Compilation.hpp -c1_LIRGenerator_.cpp c1_FrameMap.hpp -c1_LIRGenerator_.cpp c1_Instruction.hpp -c1_LIRGenerator_.cpp c1_LIRAssembler.hpp -c1_LIRGenerator_.cpp c1_LIRGenerator.hpp -c1_LIRGenerator_.cpp c1_Runtime1.hpp -c1_LIRGenerator_.cpp c1_ValueStack.hpp -c1_LIRGenerator_.cpp ciArray.hpp -c1_LIRGenerator_.cpp ciObjArrayKlass.hpp -c1_LIRGenerator_.cpp ciTypeArrayKlass.hpp -c1_LIRGenerator_.cpp sharedRuntime.hpp +c1_LIRGenerator_.cpp c1_Compilation.hpp +c1_LIRGenerator_.cpp c1_FrameMap.hpp +c1_LIRGenerator_.cpp c1_Instruction.hpp +c1_LIRGenerator_.cpp c1_LIRAssembler.hpp +c1_LIRGenerator_.cpp c1_LIRGenerator.hpp +c1_LIRGenerator_.cpp c1_Runtime1.hpp +c1_LIRGenerator_.cpp c1_ValueStack.hpp +c1_LIRGenerator_.cpp ciArray.hpp +c1_LIRGenerator_.cpp ciObjArrayKlass.hpp +c1_LIRGenerator_.cpp ciTypeArrayKlass.hpp +c1_LIRGenerator_.cpp sharedRuntime.hpp +c1_LinearScan.cpp bitMap.inline.hpp c1_LinearScan.cpp c1_CFGPrinter.hpp c1_LinearScan.cpp c1_Compilation.hpp c1_LinearScan.cpp c1_FrameMap.hpp @@ -275,6 +284,7 @@ c1_LinearScan.hpp c1_LIR.hpp c1_LinearScan.hpp c1_LIRGenerator.hpp +c1_LinearScan_.cpp bitMap.inline.hpp c1_LinearScan_.cpp c1_Instruction.hpp c1_LinearScan_.cpp c1_LinearScan.hpp @@ -297,6 +307,7 @@ c1_MacroAssembler_.hpp generate_platform_dependent_include +c1_Optimizer.cpp bitMap.inline.hpp c1_Optimizer.cpp c1_Canonicalizer.hpp c1_Optimizer.cpp c1_Optimizer.hpp c1_Optimizer.cpp c1_ValueMap.hpp @@ -362,6 +373,7 @@ c1_Runtime1_.cpp vframeArray.hpp c1_Runtime1_.cpp vmreg_.inline.hpp +c1_ValueMap.cpp bitMap.inline.hpp c1_ValueMap.cpp c1_Canonicalizer.hpp c1_ValueMap.cpp c1_IR.hpp c1_ValueMap.cpp c1_ValueMap.hpp @@ -432,4 +444,3 @@ top.hpp c1_globals.hpp vmStructs.hpp c1_Runtime1.hpp - diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_compiler2 --- a/src/share/vm/includeDB_compiler2 Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/includeDB_compiler2 Thu Jun 05 15:57:56 2008 -0700 @@ -461,10 +461,13 @@ graphKit.cpp addnode.hpp graphKit.cpp barrierSet.hpp graphKit.cpp cardTableModRefBS.hpp +graphKit.cpp g1SATBCardTableModRefBS.hpp graphKit.cpp collectedHeap.hpp graphKit.cpp compileLog.hpp graphKit.cpp deoptimization.hpp graphKit.cpp graphKit.hpp +graphKit.cpp heapRegion.hpp +graphKit.cpp idealKit.hpp graphKit.cpp locknode.hpp graphKit.cpp machnode.hpp graphKit.cpp parse.hpp @@ -484,6 +487,7 @@ idealKit.cpp callnode.hpp idealKit.cpp cfgnode.hpp idealKit.cpp idealKit.hpp +idealKit.cpp runtime.hpp idealKit.hpp connode.hpp idealKit.hpp mulnode.hpp @@ -915,9 +919,11 @@ runtime.cpp connode.hpp runtime.cpp copy.hpp runtime.cpp fprofiler.hpp +runtime.cpp g1SATBCardTableModRefBS.hpp runtime.cpp gcLocker.inline.hpp runtime.cpp graphKit.hpp runtime.cpp handles.inline.hpp +runtime.cpp heapRegion.hpp runtime.cpp icBuffer.hpp runtime.cpp interfaceSupport.hpp runtime.cpp interpreter.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/includeDB_core Thu Jun 05 15:57:56 2008 -0700 @@ -287,6 +287,10 @@ attachListener.hpp debug.hpp attachListener.hpp ostream.hpp +barrierSet.cpp barrierSet.hpp +barrierSet.cpp collectedHeap.hpp +barrierSet.cpp universe.hpp + barrierSet.hpp memRegion.hpp barrierSet.hpp oopsHierarchy.hpp @@ -294,7 +298,7 @@ barrierSet.inline.hpp cardTableModRefBS.hpp bcEscapeAnalyzer.cpp bcEscapeAnalyzer.hpp -bcEscapeAnalyzer.cpp bitMap.hpp +bcEscapeAnalyzer.cpp bitMap.inline.hpp bcEscapeAnalyzer.cpp bytecode.hpp bcEscapeAnalyzer.cpp ciConstant.hpp bcEscapeAnalyzer.cpp ciField.hpp @@ -319,13 +323,12 @@ biasedLocking.hpp growableArray.hpp biasedLocking.hpp handles.hpp -bitMap.cpp bitMap.hpp +bitMap.cpp allocation.inline.hpp bitMap.cpp bitMap.inline.hpp bitMap.cpp copy.hpp bitMap.cpp os_.inline.hpp bitMap.hpp allocation.hpp -bitMap.hpp ostream.hpp bitMap.hpp top.hpp bitMap.inline.hpp atomic.hpp @@ -644,6 +647,7 @@ ciMethod.cpp abstractCompiler.hpp ciMethod.cpp allocation.inline.hpp ciMethod.cpp bcEscapeAnalyzer.hpp +ciMethod.cpp bitMap.inline.hpp ciMethod.cpp ciCallProfile.hpp ciMethod.cpp ciExceptionHandler.hpp ciMethod.cpp ciInstanceKlass.hpp @@ -1757,7 +1761,7 @@ genRemSet.hpp oop.hpp -generateOopMap.cpp bitMap.hpp +generateOopMap.cpp bitMap.inline.hpp generateOopMap.cpp bytecodeStream.hpp generateOopMap.cpp generateOopMap.hpp generateOopMap.cpp handles.inline.hpp @@ -1805,6 +1809,8 @@ generation.inline.hpp generation.hpp generation.inline.hpp space.hpp +genOopClosures.hpp oop.hpp + generationSpec.cpp compactPermGen.hpp generationSpec.cpp defNewGeneration.hpp generationSpec.cpp filemap.hpp @@ -2216,6 +2222,11 @@ invocationCounter.hpp exceptions.hpp invocationCounter.hpp handles.hpp +intHisto.cpp intHisto.hpp + +intHisto.hpp allocation.hpp +intHisto.hpp growableArray.hpp + iterator.cpp iterator.hpp iterator.cpp oop.inline.hpp @@ -2815,6 +2826,7 @@ methodKlass.hpp methodOop.hpp methodLiveness.cpp allocation.inline.hpp +methodLiveness.cpp bitMap.inline.hpp methodLiveness.cpp bytecode.hpp methodLiveness.cpp bytecodes.hpp methodLiveness.cpp ciMethod.hpp @@ -2961,6 +2973,11 @@ nmethod.hpp codeBlob.hpp nmethod.hpp pcDesc.hpp +numberSeq.cpp debug.hpp +numberSeq.cpp numberSeq.hpp +numberSeq.cpp globalDefinitions.hpp +numberSeq.cpp allocation.inline.hpp + objArrayKlass.cpp collectedHeap.inline.hpp objArrayKlass.cpp copy.hpp objArrayKlass.cpp genOopClosures.inline.hpp @@ -3403,8 +3420,6 @@ referencePolicy.cpp referencePolicy.hpp referencePolicy.cpp universe.hpp -referencePolicy.hpp oop.hpp - referenceProcessor.cpp collectedHeap.hpp referenceProcessor.cpp collectedHeap.inline.hpp referenceProcessor.cpp java.hpp @@ -3746,6 +3761,8 @@ specialized_oop_closures.cpp ostream.hpp specialized_oop_closures.cpp specialized_oop_closures.hpp +specialized_oop_closures.hpp atomic.hpp + stackMapFrame.cpp globalDefinitions.hpp stackMapFrame.cpp handles.inline.hpp stackMapFrame.cpp oop.inline.hpp @@ -3988,7 +4005,6 @@ taskqueue.hpp allocation.hpp taskqueue.hpp allocation.inline.hpp -taskqueue.hpp debug.hpp taskqueue.hpp mutex.hpp taskqueue.hpp orderAccess_.inline.hpp @@ -4026,6 +4042,7 @@ templateInterpreterGenerator_.hpp generate_platform_dependent_include +templateTable.cpp collectedHeap.hpp templateTable.cpp templateTable.hpp templateTable.cpp timer.hpp @@ -4530,6 +4547,7 @@ vm_operations.cpp compilerOracle.hpp vm_operations.cpp deoptimization.hpp vm_operations.cpp interfaceSupport.hpp +vm_operations.cpp isGCActiveMark.hpp vm_operations.cpp resourceArea.hpp vm_operations.cpp threadService.hpp vm_operations.cpp thread_.inline.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_gc_parallel --- a/src/share/vm/includeDB_gc_parallel Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/includeDB_gc_parallel Thu Jun 05 15:57:56 2008 -0700 @@ -21,6 +21,10 @@ // have any questions. // +assembler_.cpp g1SATBCardTableModRefBS.hpp +assembler_.cpp g1CollectedHeap.inline.hpp +assembler_.cpp heapRegion.hpp + collectorPolicy.cpp cmsAdaptiveSizePolicy.hpp collectorPolicy.cpp cmsGCAdaptivePolicyCounters.hpp @@ -37,6 +41,9 @@ heapInspection.cpp parallelScavengeHeap.hpp +instanceKlass.cpp heapRegionSeq.inline.hpp +instanceKlass.cpp g1CollectedHeap.inline.hpp +instanceKlass.cpp g1OopClosures.inline.hpp instanceKlass.cpp oop.pcgc.inline.hpp instanceKlass.cpp psPromotionManager.inline.hpp instanceKlass.cpp psScavenge.inline.hpp @@ -48,6 +55,9 @@ instanceKlassKlass.cpp psScavenge.inline.hpp instanceKlassKlass.cpp parOopClosures.inline.hpp +instanceRefKlass.cpp heapRegionSeq.inline.hpp +instanceRefKlass.cpp g1CollectedHeap.inline.hpp +instanceRefKlass.cpp g1OopClosures.inline.hpp instanceRefKlass.cpp oop.pcgc.inline.hpp instanceRefKlass.cpp psPromotionManager.inline.hpp instanceRefKlass.cpp psScavenge.inline.hpp @@ -70,6 +80,7 @@ memoryService.cpp cmsPermGen.hpp memoryService.cpp concurrentMarkSweepGeneration.hpp +memoryService.cpp g1CollectedHeap.inline.hpp memoryService.cpp parNewGeneration.hpp memoryService.cpp parallelScavengeHeap.hpp memoryService.cpp psMemoryPool.hpp @@ -80,6 +91,9 @@ methodDataKlass.cpp oop.pcgc.inline.hpp methodDataKlass.cpp psScavenge.inline.hpp +objArrayKlass.cpp heapRegionSeq.inline.hpp +objArrayKlass.cpp g1CollectedHeap.inline.hpp +objArrayKlass.cpp g1OopClosures.inline.hpp objArrayKlass.cpp oop.pcgc.inline.hpp objArrayKlass.cpp psPromotionManager.inline.hpp objArrayKlass.cpp psScavenge.inline.hpp @@ -122,6 +136,9 @@ thread.cpp concurrentMarkSweepThread.hpp thread.cpp pcTasks.hpp +thread.hpp dirtyCardQueue.hpp +thread.hpp satbQueue.hpp + universe.cpp parallelScavengeHeap.hpp universe.cpp cmsCollectorPolicy.hpp universe.cpp cmsAdaptiveSizePolicy.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_jvmti --- a/src/share/vm/includeDB_jvmti Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/includeDB_jvmti Thu Jun 05 15:57:56 2008 -0700 @@ -209,6 +209,7 @@ jvmtiManageCapabilities.hpp allocation.hpp jvmtiManageCapabilities.hpp jvmti.h +jvmtiRedefineClasses.cpp bitMap.inline.hpp jvmtiRedefineClasses.cpp codeCache.hpp jvmtiRedefineClasses.cpp deoptimization.hpp jvmtiRedefineClasses.cpp gcLocker.hpp diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/interpreter/templateTable.cpp --- a/src/share/vm/interpreter/templateTable.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/interpreter/templateTable.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -172,6 +172,7 @@ Template* TemplateTable::_desc; InterpreterMacroAssembler* TemplateTable::_masm; +BarrierSet* TemplateTable::_bs; void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) { @@ -244,6 +245,8 @@ // Initialize table TraceTime timer("TemplateTable initialization", TraceStartupTime); + _bs = Universe::heap()->barrier_set(); + // For better readability const char _ = ' '; const int ____ = 0; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/interpreter/templateTable.hpp --- a/src/share/vm/interpreter/templateTable.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/interpreter/templateTable.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -82,6 +82,7 @@ static Template* _desc; // the current template to be generated static Bytecodes::Code bytecode() { return _desc->bytecode(); } + static BarrierSet* _bs; // Cache the barrier set. public: //%note templates_1 static InterpreterMacroAssembler* _masm; // the assembler used when generating templates diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/allocation.hpp --- a/src/share/vm/memory/allocation.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/allocation.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -338,6 +338,12 @@ DEBUG_ONLY(((ResourceObj *)res)->_allocation = RESOURCE_AREA;) return res; } + void* operator new(size_t size, void* where, allocation_type type) { + void* res = where; + // Set allocation type in the resource object + DEBUG_ONLY(((ResourceObj *)res)->_allocation = type;) + return res; + } void operator delete(void* p); }; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/barrierSet.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_barrierSet.cpp.incl" + +// count is in HeapWord's +void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) { + Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count)); +} + +// count is in HeapWord's +void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) { + Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count)); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.hpp --- a/src/share/vm/memory/barrierSet.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/barrierSet.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -32,6 +32,8 @@ ModRef, CardTableModRef, CardTableExtension, + G1SATBCT, + G1SATBCTLogging, Other, Uninit }; @@ -42,14 +44,16 @@ public: + BarrierSet() { _kind = Uninit; } // To get around prohibition on RTTI. - virtual BarrierSet::Name kind() { return _kind; } + BarrierSet::Name kind() { return _kind; } virtual bool is_a(BarrierSet::Name bsn) = 0; // These operations indicate what kind of barriers the BarrierSet has. virtual bool has_read_ref_barrier() = 0; virtual bool has_read_prim_barrier() = 0; virtual bool has_write_ref_barrier() = 0; + virtual bool has_write_ref_pre_barrier() = 0; virtual bool has_write_prim_barrier() = 0; // These functions indicate whether a particular access of the given @@ -57,7 +61,8 @@ virtual bool read_ref_needs_barrier(void* field) = 0; virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0; virtual bool write_ref_needs_barrier(void* field, oop new_val) = 0; - virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, juint val1, juint val2) = 0; + virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, + juint val1, juint val2) = 0; // The first four operations provide a direct implementation of the // barrier set. An interpreter loop, for example, could call these @@ -75,6 +80,13 @@ // (For efficiency reasons, this operation is specialized for certain // barrier types. Semantically, it should be thought of as a call to the // virtual "_work" function below, which must implement the barrier.) + // First the pre-write versions... + inline void write_ref_field_pre(void* field, oop new_val); +protected: + virtual void write_ref_field_pre_work(void* field, oop new_val) {}; +public: + + // ...then the post-write version. inline void write_ref_field(void* field, oop new_val); protected: virtual void write_ref_field_work(void* field, oop new_val) = 0; @@ -92,6 +104,7 @@ // the particular barrier. virtual bool has_read_ref_array_opt() = 0; virtual bool has_read_prim_array_opt() = 0; + virtual bool has_write_ref_array_pre_opt() { return true; } virtual bool has_write_ref_array_opt() = 0; virtual bool has_write_prim_array_opt() = 0; @@ -104,7 +117,13 @@ virtual void read_ref_array(MemRegion mr) = 0; virtual void read_prim_array(MemRegion mr) = 0; + virtual void write_ref_array_pre(MemRegion mr) {} inline void write_ref_array(MemRegion mr); + + // Static versions, suitable for calling from generated code. + static void static_write_ref_array_pre(HeapWord* start, size_t count); + static void static_write_ref_array_post(HeapWord* start, size_t count); + protected: virtual void write_ref_array_work(MemRegion mr) = 0; public: @@ -120,33 +139,6 @@ virtual void write_region_work(MemRegion mr) = 0; public: - // The remaining sets of operations are called by compilers or other code - // generators to insert barriers into generated code. There may be - // several such code generators; the signatures of these - // barrier-generating functions may differ from generator to generator. - // There will be a set of four function signatures for each code - // generator, which accomplish the generation of barriers of the four - // kinds listed above. - -#ifdef TBD - // Generates code to invoke the barrier, if any, necessary when reading - // the ref field at "offset" in "obj". - virtual void gen_read_ref_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when reading - // the primitive field of "bytes" bytes at offset" in "obj". - virtual void gen_read_prim_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when writing - // "new_val" into the ref field at "offset" in "obj". - virtual void gen_write_ref_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when writing - // the "bytes"-byte value "new_val" into the primitive field at "offset" - // in "obj". - virtual void gen_write_prim_field() = 0; -#endif - // Some barrier sets create tables whose elements correspond to parts of // the heap; the CardTableModRefBS is an example. Such barrier sets will // normally reserve space for such tables, and commit parts of the table diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.inline.hpp --- a/src/share/vm/memory/barrierSet.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/barrierSet.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -26,6 +26,14 @@ // performance-critical calls when when the barrier is the most common // card-table kind. +void BarrierSet::write_ref_field_pre(void* field, oop new_val) { + if (kind() == CardTableModRef) { + ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val); + } else { + write_ref_field_pre_work(field, new_val); + } +} + void BarrierSet::write_ref_field(void* field, oop new_val) { if (kind() == CardTableModRef) { ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/blockOffsetTable.cpp --- a/src/share/vm/memory/blockOffsetTable.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/blockOffsetTable.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -184,7 +184,7 @@ "Offset card has an unexpected value"); size_t start_card_for_region = start_card; u_char offset = max_jubyte; - for (int i = 0; i <= N_powers-1; i++) { + for (int i = 0; i < N_powers; i++) { // -1 so that the the card with the actual offset is counted. Another -1 // so that the reach ends in this region and not at the start // of the next. diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/blockOffsetTable.hpp --- a/src/share/vm/memory/blockOffsetTable.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/blockOffsetTable.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -208,6 +208,7 @@ ////////////////////////////////////////////////////////////////////////// class BlockOffsetArray: public BlockOffsetTable { friend class VMStructs; + friend class G1BlockOffsetArray; // temp. until we restructure and cleanup protected: // The following enums are used by do_block_internal() below enum Action { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -299,6 +299,17 @@ } +bool CardTableModRefBS::claim_card(size_t card_index) { + jbyte val = _byte_map[card_index]; + if (val != claimed_card_val()) { + jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); + if (res == val) + return true; + else return false; + } + return false; +} + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, @@ -398,7 +409,7 @@ } } -void CardTableModRefBS::invalidate(MemRegion mr) { +void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (!mri.is_empty()) dirty_MemRegion(mri); @@ -426,11 +437,15 @@ } } +void CardTableModRefBS::dirty(MemRegion mr) { + jbyte* first = byte_for(mr.start()); + jbyte* last = byte_after(mr.last()); + memset(first, dirty_card, last-first); +} + // NOTES: // (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate() // iterates over dirty cards ranges in increasing address order. -// (2) Unlike, e.g., dirty_card_range_after_preclean() below, -// this method does not make the dirty cards prelceaned. void CardTableModRefBS::dirty_card_iterate(MemRegion mr, MemRegionClosure* cl) { for (int i = 0; i < _cur_covered_regions; i++) { @@ -456,7 +471,9 @@ } } -MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) { +MemRegion CardTableModRefBS::dirty_card_range_after_reset(MemRegion mr, + bool reset, + int reset_val) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (!mri.is_empty()) { @@ -473,8 +490,10 @@ dirty_cards++, next_entry++); MemRegion cur_cards(addr_for(cur_entry), dirty_cards*card_size_in_words); - for (size_t i = 0; i < dirty_cards; i++) { - cur_entry[i] = precleaned_card; + if (reset) { + for (size_t i = 0; i < dirty_cards; i++) { + cur_entry[i] = reset_val; + } } return cur_cards; } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -54,6 +54,7 @@ clean_card = -1, dirty_card = 0, precleaned_card = 1, + claimed_card = 3, last_card = 4, CT_MR_BS_last_reserved = 10 }; @@ -150,17 +151,6 @@ return byte_for(p) + 1; } - // Mapping from card marking array entry to address of first word - HeapWord* addr_for(const jbyte* p) const { - assert(p >= _byte_map && p < _byte_map + _byte_map_size, - "out of bounds access to card marking array"); - size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte)); - HeapWord* result = (HeapWord*) (delta << card_shift); - assert(_whole_heap.contains(result), - "out of bounds accessor from card marking array"); - return result; - } - // Iterate over the portion of the card-table which covers the given // region mr in the given space and apply cl to any dirty sub-regions // of mr. cl and dcto_cl must either be the same closure or cl must @@ -263,16 +253,22 @@ card_size_in_words = card_size / sizeof(HeapWord) }; + static int clean_card_val() { return clean_card; } + static int dirty_card_val() { return dirty_card; } + static int claimed_card_val() { return claimed_card; } + static int precleaned_card_val() { return precleaned_card; } + // For RTTI simulation. - BarrierSet::Name kind() { return BarrierSet::CardTableModRef; } bool is_a(BarrierSet::Name bsn) { - return bsn == BarrierSet::CardTableModRef || bsn == BarrierSet::ModRef; + return bsn == BarrierSet::CardTableModRef || ModRefBarrierSet::is_a(bsn); } CardTableModRefBS(MemRegion whole_heap, int max_covered_regions); // *** Barrier set functions. + bool has_write_ref_pre_barrier() { return false; } + inline bool write_ref_needs_barrier(void* field, oop new_val) { // Note that this assumes the perm gen is the highest generation // in the address space @@ -315,11 +311,33 @@ // *** Card-table-barrier-specific things. + inline void inline_write_ref_field_pre(void* field, oop newVal) {} + inline void inline_write_ref_field(void* field, oop newVal) { jbyte* byte = byte_for(field); *byte = dirty_card; } + // These are used by G1, when it uses the card table as a temporary data + // structure for card claiming. + bool is_card_dirty(size_t card_index) { + return _byte_map[card_index] == dirty_card_val(); + } + + void mark_card_dirty(size_t card_index) { + _byte_map[card_index] = dirty_card_val(); + } + + bool is_card_claimed(size_t card_index) { + return _byte_map[card_index] == claimed_card_val(); + } + + bool claim_card(size_t card_index); + + bool is_card_clean(size_t card_index) { + return _byte_map[card_index] == clean_card_val(); + } + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -344,8 +362,9 @@ } // ModRefBS functions. - void invalidate(MemRegion mr); + virtual void invalidate(MemRegion mr, bool whole_heap = false); void clear(MemRegion mr); + void dirty(MemRegion mr); void mod_oop_in_space_iterate(Space* sp, OopClosure* cl, bool clear = false, bool before_save_marks = false); @@ -375,18 +394,39 @@ static uintx ct_max_alignment_constraint(); - // Apply closure cl to the dirty cards lying completely - // within MemRegion mr, setting the cards to precleaned. - void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl); + // Apply closure "cl" to the dirty cards containing some part of + // MemRegion "mr". + void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl); // Return the MemRegion corresponding to the first maximal run - // of dirty cards lying completely within MemRegion mr, after - // marking those cards precleaned. - MemRegion dirty_card_range_after_preclean(MemRegion mr); + // of dirty cards lying completely within MemRegion mr. + // If reset is "true", then sets those card table entries to the given + // value. + MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset, + int reset_val); // Set all the dirty cards in the given region to precleaned state. void preclean_dirty_cards(MemRegion mr); + // Provide read-only access to the card table array. + const jbyte* byte_for_const(const void* p) const { + return byte_for(p); + } + const jbyte* byte_after_const(const void* p) const { + return byte_after(p); + } + + // Mapping from card marking array entry to address of first word + HeapWord* addr_for(const jbyte* p) const { + assert(p >= _byte_map && p < _byte_map + _byte_map_size, + "out of bounds access to card marking array"); + size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte)); + HeapWord* result = (HeapWord*) (delta << card_shift); + assert(_whole_heap.contains(result), + "out of bounds accessor from card marking array"); + return result; + } + // Mapping from address to card marking array index. int index_for(void* p) { assert(_whole_heap.contains(p), @@ -402,6 +442,7 @@ static size_t par_chunk_heapword_alignment() { return CardsPerStrideChunk * card_size_in_words; } + }; class CardTableRS; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableRS.cpp --- a/src/share/vm/memory/cardTableRS.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/cardTableRS.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -27,10 +27,25 @@ CardTableRS::CardTableRS(MemRegion whole_heap, int max_covered_regions) : - GenRemSet(&_ct_bs), - _ct_bs(whole_heap, max_covered_regions), - _cur_youngergen_card_val(youngergenP1_card) + GenRemSet(), + _cur_youngergen_card_val(youngergenP1_card), + _regions_to_iterate(max_covered_regions - 1) { +#ifndef SERIALGC + if (UseG1GC) { + if (G1RSBarrierUseQueue) { + _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, + max_covered_regions); + } else { + _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions); + } + } else { + _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); + } +#else + _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); +#endif + set_bs(_ct_bs); _last_cur_val_in_gen = new jbyte[GenCollectedHeap::max_gens + 1]; if (_last_cur_val_in_gen == NULL) { vm_exit_during_initialization("Could not last_cur_val_in_gen array."); @@ -38,20 +53,19 @@ for (int i = 0; i < GenCollectedHeap::max_gens + 1; i++) { _last_cur_val_in_gen[i] = clean_card_val(); } - _ct_bs.set_CTRS(this); + _ct_bs->set_CTRS(this); } void CardTableRS::resize_covered_region(MemRegion new_region) { - _ct_bs.resize_covered_region(new_region); + _ct_bs->resize_covered_region(new_region); } jbyte CardTableRS::find_unused_youngergenP_card_value() { - GenCollectedHeap* gch = GenCollectedHeap::heap(); for (jbyte v = youngergenP1_card; v < cur_youngergen_and_prev_nonclean_card; v++) { bool seen = false; - for (int g = 0; g < gch->n_gens()+1; g++) { + for (int g = 0; g < _regions_to_iterate; g++) { if (_last_cur_val_in_gen[g] == v) { seen = true; break; @@ -221,11 +235,11 @@ void CardTableRS::younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl) { - DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs.precision(), + DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(), cl->gen_boundary()); ClearNoncleanCardWrapper clear_cl(dcto_cl, this); - _ct_bs.non_clean_card_iterate(sp, sp->used_region_at_save_marks(), + _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(), dcto_cl, &clear_cl, false); } @@ -549,7 +563,7 @@ if (ch->kind() == CollectedHeap::GenCollectedHeap) { GenCollectedHeap::heap()->generation_iterate(&blk, false); - _ct_bs.verify(); + _ct_bs->verify(); // If the old gen collections also collect perm, then we are only // interested in perm-to-young pointers, not perm-to-old pointers. diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableRS.hpp --- a/src/share/vm/memory/cardTableRS.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/cardTableRS.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -44,7 +44,7 @@ return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv); } - CardTableModRefBSForCTRS _ct_bs; + CardTableModRefBSForCTRS* _ct_bs; virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl); @@ -73,6 +73,8 @@ jbyte _cur_youngergen_card_val; + int _regions_to_iterate; + jbyte cur_youngergen_card_val() { return _cur_youngergen_card_val; } @@ -96,7 +98,7 @@ CardTableRS* as_CardTableRS() { return this; } - CardTableModRefBS* ct_bs() { return &_ct_bs; } + CardTableModRefBS* ct_bs() { return _ct_bs; } // Override. void prepare_for_younger_refs_iterate(bool parallel); @@ -107,7 +109,7 @@ void younger_refs_iterate(Generation* g, OopsInGenClosure* blk); void inline_write_ref_field_gc(void* field, oop new_val) { - jbyte* byte = _ct_bs.byte_for(field); + jbyte* byte = _ct_bs->byte_for(field); *byte = youngergen_card; } void write_ref_field_gc_work(void* field, oop new_val) { @@ -122,25 +124,27 @@ void resize_covered_region(MemRegion new_region); bool is_aligned(HeapWord* addr) { - return _ct_bs.is_card_aligned(addr); + return _ct_bs->is_card_aligned(addr); } void verify(); void verify_aligned_region_empty(MemRegion mr); - void clear(MemRegion mr) { _ct_bs.clear(mr); } + void clear(MemRegion mr) { _ct_bs->clear(mr); } void clear_into_younger(Generation* gen, bool clear_perm); - void invalidate(MemRegion mr) { _ct_bs.invalidate(mr); } + void invalidate(MemRegion mr, bool whole_heap = false) { + _ct_bs->invalidate(mr, whole_heap); + } void invalidate_or_clear(Generation* gen, bool younger, bool perm); static uintx ct_max_alignment_constraint() { return CardTableModRefBS::ct_max_alignment_constraint(); } - jbyte* byte_for(void* p) { return _ct_bs.byte_for(p); } - jbyte* byte_after(void* p) { return _ct_bs.byte_after(p); } - HeapWord* addr_for(jbyte* p) { return _ct_bs.addr_for(p); } + jbyte* byte_for(void* p) { return _ct_bs->byte_for(p); } + jbyte* byte_after(void* p) { return _ct_bs->byte_after(p); } + HeapWord* addr_for(jbyte* p) { return _ct_bs->addr_for(p); } bool is_prev_nonclean_card_val(jbyte v) { return diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/collectorPolicy.cpp --- a/src/share/vm/memory/collectorPolicy.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/collectorPolicy.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -31,11 +31,11 @@ if (PermSize > MaxPermSize) { MaxPermSize = PermSize; } - PermSize = align_size_down(PermSize, min_alignment()); + PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment())); MaxPermSize = align_size_up(MaxPermSize, max_alignment()); - MinPermHeapExpansion = align_size_down(MinPermHeapExpansion, min_alignment()); - MaxPermHeapExpansion = align_size_down(MaxPermHeapExpansion, min_alignment()); + MinPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MinPermHeapExpansion, min_alignment())); + MaxPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MaxPermHeapExpansion, min_alignment())); MinHeapDeltaBytes = align_size_up(MinHeapDeltaBytes, min_alignment()); @@ -55,25 +55,21 @@ void CollectorPolicy::initialize_size_info() { // User inputs from -mx and ms are aligned - _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(), - min_alignment()); - set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(), - min_alignment())); - set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment())); - - // Check validity of heap parameters from launcher + set_initial_heap_byte_size(Arguments::initial_heap_size()); if (initial_heap_byte_size() == 0) { set_initial_heap_byte_size(NewSize + OldSize); - } else { - Universe::check_alignment(initial_heap_byte_size(), min_alignment(), - "initial heap"); } + set_initial_heap_byte_size(align_size_up(_initial_heap_byte_size, + min_alignment())); + + set_min_heap_byte_size(Arguments::min_heap_size()); if (min_heap_byte_size() == 0) { set_min_heap_byte_size(NewSize + OldSize); - } else { - Universe::check_alignment(min_heap_byte_size(), min_alignment(), - "initial heap"); } + set_min_heap_byte_size(align_size_up(_min_heap_byte_size, + min_alignment())); + + set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment())); // Check heap parameter properties if (initial_heap_byte_size() < M) { @@ -121,8 +117,6 @@ int max_covered_regions) { switch (rem_set_name()) { case GenRemSet::CardTable: { - if (barrier_set_name() != BarrierSet::CardTableModRef) - vm_exit_during_initialization("Mismatch between RS and BS."); CardTableRS* res = new CardTableRS(whole_heap, max_covered_regions); return res; } @@ -345,7 +339,7 @@ // At this point all three sizes have been checked against the // maximum sizes but have not been checked for consistency - // amoung the three. + // among the three. // Final check min <= initial <= max set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size)); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/collectorPolicy.hpp --- a/src/share/vm/memory/collectorPolicy.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/collectorPolicy.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -39,10 +39,12 @@ // Forward declarations. class GenCollectorPolicy; class TwoGenerationCollectorPolicy; +class AdaptiveSizePolicy; #ifndef SERIALGC class ConcurrentMarkSweepPolicy; +class G1CollectorPolicy; #endif // SERIALGC -class AdaptiveSizePolicy; + class GCPolicyCounters; class PermanentGenerationSpec; class MarkSweepPolicy; @@ -55,7 +57,7 @@ // Requires that the concrete subclass sets the alignment constraints // before calling. virtual void initialize_flags(); - virtual void initialize_size_info() = 0; + virtual void initialize_size_info(); // Initialize "_permanent_generation" to a spec for the given kind of // Perm Gen. void initialize_perm_generation(PermGen::Name pgnm); @@ -91,17 +93,18 @@ enum Name { CollectorPolicyKind, TwoGenerationCollectorPolicyKind, - TrainPolicyKind, ConcurrentMarkSweepPolicyKind, - ASConcurrentMarkSweepPolicyKind + ASConcurrentMarkSweepPolicyKind, + G1CollectorPolicyKind }; // Identification methods. - virtual GenCollectorPolicy* as_generation_policy() { return NULL; } + virtual GenCollectorPolicy* as_generation_policy() { return NULL; } virtual TwoGenerationCollectorPolicy* as_two_generation_policy() { return NULL; } virtual MarkSweepPolicy* as_mark_sweep_policy() { return NULL; } #ifndef SERIALGC virtual ConcurrentMarkSweepPolicy* as_concurrent_mark_sweep_policy() { return NULL; } + virtual G1CollectorPolicy* as_g1_policy() { return NULL; } #endif // SERIALGC // Note that these are not virtual. bool is_generation_policy() { return as_generation_policy() != NULL; } @@ -109,10 +112,13 @@ bool is_mark_sweep_policy() { return as_mark_sweep_policy() != NULL; } #ifndef SERIALGC bool is_concurrent_mark_sweep_policy() { return as_concurrent_mark_sweep_policy() != NULL; } + bool is_g1_policy() { return as_g1_policy() != NULL; } #else // SERIALGC bool is_concurrent_mark_sweep_policy() { return false; } + bool is_g1_policy() { return false; } #endif // SERIALGC + virtual PermanentGenerationSpec *permanent_generation() { assert(_permanent_generation != NULL, "Sanity check"); return _permanent_generation; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/defNewGeneration.cpp --- a/src/share/vm/memory/defNewGeneration.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/defNewGeneration.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -214,20 +214,26 @@ MemRegion fromMR((HeapWord*)from_start, (HeapWord*)to_start); MemRegion toMR ((HeapWord*)to_start, (HeapWord*)to_end); - eden()->initialize(edenMR, (minimum_eden_size == 0)); - // If minumum_eden_size != 0, we will not have cleared any - // portion of eden above its top. This can cause newly - // expanded space not to be mangled if using ZapUnusedHeapArea. - // We explicitly do such mangling here. - if (ZapUnusedHeapArea && (minimum_eden_size != 0)) { - eden()->mangle_unused_area(); + eden()->set_bounds(edenMR); + if (minimum_eden_size == 0) { + // The "minimum_eden_size" is really the amount of eden occupied by + // allocated objects -- if this is zero, then we can clear the space. + eden()->clear(); + } else { + // Otherwise, we will not have cleared eden. This can cause newly + // expanded space not to be mangled if using ZapUnusedHeapArea. + // We explicitly do such mangling here. + if (ZapUnusedHeapArea) { + eden()->mangle_unused_area(); + } } - from()->initialize(fromMR, true); - to()->initialize(toMR , true); - eden()->set_next_compaction_space(from()); + from()->set_bounds(fromMR); from()->clear(); + to()->set_bounds(toMR); to()->clear(); + // Make sure we compact eden, then from. // The to-space is normally empty before a compaction so need // not be considered. The exception is during promotion // failure handling when to-space can contain live objects. + eden()->set_next_compaction_space(from()); from()->set_next_compaction_space(NULL); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genCollectedHeap.hpp --- a/src/share/vm/memory/genCollectedHeap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/genCollectedHeap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -252,6 +252,21 @@ virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; virtual HeapWord* allocate_new_tlab(size_t size); + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + return true; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + // CMS needs to see all, even intra-generational, ref updates. + return !UseConcMarkSweepGC; + } + // The "requestor" generation is performing some garbage collection // action for which it would be useful to have scratch space. The // requestor promises to allocate no more than "max_alloc_words" in any diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genMarkSweep.hpp --- a/src/share/vm/memory/genMarkSweep.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/genMarkSweep.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -24,6 +24,7 @@ class GenMarkSweep : public MarkSweep { friend class VM_MarkSweep; + friend class G1MarkSweep; public: static void invoke_at_safepoint(int level, ReferenceProcessor* rp, bool clear_all_softrefs); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genOopClosures.hpp --- a/src/share/vm/memory/genOopClosures.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/genOopClosures.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -56,6 +56,9 @@ // pointers must call the method below. template void do_barrier(T* p); + // Version for use by closures that may be called in parallel code. + void par_do_barrier(oop* p); + public: OopsInGenClosure() : OopClosure(NULL), _orig_gen(NULL), _gen(NULL), _gen_boundary(NULL), _rs(NULL) {}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genOopClosures.inline.hpp --- a/src/share/vm/memory/genOopClosures.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/genOopClosures.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -48,6 +48,16 @@ } } +inline void OopsInGenClosure::par_do_barrier(oop* p) { + assert(generation()->is_in_reserved(p), "expected ref in generation"); + oop obj = *p; + assert(obj != NULL, "expected non-null object"); + // If p points to a younger generation, mark the card. + if ((HeapWord*)obj < gen_boundary()) { + rs()->write_ref_field_gc_par(p, obj); + } +} + // NOTE! Any changes made here should also be made // in FastScanClosure::do_oop_work() template inline void ScanClosure::do_oop_work(T* p) { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genRemSet.hpp --- a/src/share/vm/memory/genRemSet.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/genRemSet.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -42,6 +42,7 @@ }; GenRemSet(BarrierSet * bs) : _bs(bs) {} + GenRemSet() : _bs(NULL) {} virtual Name rs_kind() = 0; @@ -53,6 +54,9 @@ // Return the barrier set associated with "this." BarrierSet* bs() { return _bs; } + // Set the barrier set. + void set_bs(BarrierSet* bs) { _bs = bs; } + // Do any (sequential) processing necessary to prepare for (possibly // "parallel", if that arg is true) calls to younger_refs_iterate. virtual void prepare_for_younger_refs_iterate(bool parallel) = 0; @@ -116,7 +120,10 @@ // Informs the RS that refs in the given "mr" may have changed // arbitrarily, and therefore may contain old-to-young pointers. - virtual void invalidate(MemRegion mr) = 0; + // If "whole heap" is true, then this invalidation is part of an + // invalidation of the whole heap, which an implementation might + // handle differently than that of a sub-part of the heap. + virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0; // Informs the RS that refs in this generation // may have changed arbitrarily, and therefore may contain diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/heapInspection.cpp --- a/src/share/vm/memory/heapInspection.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/heapInspection.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -238,11 +238,14 @@ HeapWord* ref; CollectedHeap* heap = Universe::heap(); + bool is_shared_heap = false; switch (heap->kind()) { + case CollectedHeap::G1CollectedHeap: case CollectedHeap::GenCollectedHeap: { - GenCollectedHeap* gch = (GenCollectedHeap*)heap; - gch->gc_prologue(false /* !full */); // get any necessary locks - ref = gch->perm_gen()->used_region().start(); + is_shared_heap = true; + SharedHeap* sh = (SharedHeap*)heap; + sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + ref = sh->perm_gen()->used_region().start(); break; } #ifndef SERIALGC @@ -284,9 +287,9 @@ } st->flush(); - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - gch->gc_epilogue(false /* !full */); // release all acquired locks + if (is_shared_heap) { + SharedHeap* sh = (SharedHeap*)heap; + sh->gc_epilogue(false /* !full */); // release all acquired locks, etc. } } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/iterator.hpp --- a/src/share/vm/memory/iterator.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/iterator.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -26,9 +26,23 @@ class ReferenceProcessor; +// Closure provides abortability. + +class Closure : public StackObj { + protected: + bool _abort; + void set_abort() { _abort = true; } + public: + Closure() : _abort(false) {} + // A subtype can use this mechanism to indicate to some iterator mapping + // functions that the iteration should cease. + bool abort() { return _abort; } + void clear_abort() { _abort = false; } +}; + // OopClosure is used for iterating through roots (oop*) -class OopClosure : public StackObj { +class OopClosure : public Closure { public: ReferenceProcessor* _ref_processor; OopClosure(ReferenceProcessor* rp) : _ref_processor(rp) { } @@ -55,11 +69,16 @@ Prefetch::style prefetch_style() { // Note that this is non-virtual. return Prefetch::do_none; } + + // True iff this closure may be safely applied more than once to an oop + // location without an intervening "major reset" (like the end of a GC). + virtual bool idempotent() { return false; } + virtual bool apply_to_weak_ref_discovered_field() { return false; } }; // ObjectClosure is used for iterating through an object space -class ObjectClosure : public StackObj { +class ObjectClosure : public Closure { public: // Called for each object. virtual void do_object(oop obj) = 0; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/modRefBarrierSet.hpp --- a/src/share/vm/memory/modRefBarrierSet.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/modRefBarrierSet.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -31,6 +31,13 @@ class ModRefBarrierSet: public BarrierSet { public: + + ModRefBarrierSet() { _kind = BarrierSet::ModRef; } + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::ModRef; + } + // Barriers only on ref writes. bool has_read_ref_barrier() { return false; } bool has_read_prim_barrier() { return false; } @@ -85,8 +92,10 @@ bool clear = false, bool before_save_marks = false) = 0; - // Causes all refs in "mr" to be assumed to be modified. - virtual void invalidate(MemRegion mr) = 0; + // Causes all refs in "mr" to be assumed to be modified. If "whole_heap" + // is true, the caller asserts that the entire heap is being invalidated, + // which may admit an optimized implementation for some barriers. + virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0; // The caller guarantees that "mr" contains no references. (Perhaps it's // objects have been moved elsewhere.) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/referenceProcessor.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -91,7 +91,8 @@ bool mt_discovery, BoolObjectClosure* is_alive_non_header, int parallel_gc_threads, - bool mt_processing) { + bool mt_processing, + bool dl_needs_barrier) { int mt_degree = 1; if (parallel_gc_threads > 1) { mt_degree = parallel_gc_threads; @@ -99,7 +100,8 @@ ReferenceProcessor* rp = new ReferenceProcessor(span, atomic_discovery, mt_discovery, mt_degree, - mt_processing && (parallel_gc_threads > 0)); + mt_processing && (parallel_gc_threads > 0), + dl_needs_barrier); if (rp == NULL) { vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); } @@ -111,10 +113,13 @@ bool atomic_discovery, bool mt_discovery, int mt_degree, - bool mt_processing) : + bool mt_processing, + bool discovered_list_needs_barrier) : _discovering_refs(false), _enqueuing_is_done(false), _is_alive_non_header(NULL), + _discovered_list_needs_barrier(discovered_list_needs_barrier), + _bs(NULL), _processing_is_mt(mt_processing), _next_id(0) { @@ -135,6 +140,10 @@ _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); } + // If we do barreirs, cache a copy of the barrier set. + if (discovered_list_needs_barrier) { + _bs = Universe::heap()->barrier_set(); + } } #ifndef PRODUCT @@ -727,10 +736,15 @@ refs_list.set_length(0); } -void -ReferenceProcessor::abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]) { - for (int i = 0; i < _num_q; i++) { - abandon_partial_discovered_list(refs_lists[i]); +void ReferenceProcessor::abandon_partial_discovery() { + // loop over the lists + for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { + gclog_or_tty->print_cr( + "\nAbandoning %s discovered list", + list_name(i)); + } + abandon_partial_discovered_list(_discoveredSoftRefs[i]); } } @@ -994,7 +1008,16 @@ assert(_discovery_is_mt, "!_discovery_is_mt should have been handled by caller"); // First we must make sure this object is only enqueued once. CAS in a non null // discovered_addr. - oop retest = oopDesc::atomic_compare_exchange_oop(refs_list.head(), discovered_addr, + oop current_head = refs_list.head(); + + // Note: In the case of G1, this pre-barrier is strictly + // not necessary because the only case we are interested in + // here is when *discovered_addr is NULL, so this will expand to + // nothing. As a result, I am just manually eliding this out for G1. + if (_discovered_list_needs_barrier && !UseG1GC) { + _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR"); + } + oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr, NULL); if (retest == NULL) { // This thread just won the right to enqueue the object. @@ -1002,6 +1025,10 @@ // is necessary. refs_list.set_head(obj); refs_list.set_length(refs_list.length() + 1); + if (_discovered_list_needs_barrier) { + _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR"); + } + } else { // If retest was non NULL, another thread beat us to it: // The reference has already been discovered... @@ -1073,8 +1100,8 @@ } } - HeapWord* discovered_addr = java_lang_ref_Reference::discovered_addr(obj); - oop discovered = java_lang_ref_Reference::discovered(obj); + HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj); + const oop discovered = java_lang_ref_Reference::discovered(obj); assert(discovered->is_oop_or_null(), "bad discovered field"); if (discovered != NULL) { // The reference has already been discovered... @@ -1094,7 +1121,7 @@ // discovered twice except by concurrent collectors that potentially // trace the same Reference object twice. assert(UseConcMarkSweepGC, - "Only possible with a concurrent collector"); + "Only possible with an incremental-update concurrent collector"); return true; } } @@ -1122,12 +1149,24 @@ return false; // nothing special needs to be done } - // We do a raw store here, the field will be visited later when - // processing the discovered references. if (_discovery_is_mt) { add_to_discovered_list_mt(*list, obj, discovered_addr); } else { - oop_store_raw(discovered_addr, list->head()); + // If "_discovered_list_needs_barrier", we do write barriers when + // updating the discovered reference list. Otherwise, we do a raw store + // here: the field will be visited later when processing the discovered + // references. + oop current_head = list->head(); + // As in the case further above, since we are over-writing a NULL + // pre-value, we can safely elide the pre-barrier here for the case of G1. + assert(discovered == NULL, "control point invariant"); + if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1 + _bs->write_ref_field_pre((oop*)discovered_addr, current_head); + } + oop_store_raw(discovered_addr, current_head); + if (_discovered_list_needs_barrier) { + _bs->write_ref_field((oop*)discovered_addr, current_head); + } list->set_head(obj); list->set_length(list->length() + 1); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/referenceProcessor.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -54,6 +54,14 @@ bool _discovery_is_atomic; // if discovery is atomic wrt // other collectors in configuration bool _discovery_is_mt; // true if reference discovery is MT. + // If true, setting "next" field of a discovered refs list requires + // write barrier(s). (Must be true if used in a collector in which + // elements of a discovered list may be moved during discovery: for + // example, a collector like Garbage-First that moves objects during a + // long-term concurrent marking phase that does weak reference + // discovery.) + bool _discovered_list_needs_barrier; + BarrierSet* _bs; // Cached copy of BarrierSet. bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. @@ -196,7 +204,6 @@ void verify_ok_to_handle_reflists() PRODUCT_RETURN; void abandon_partial_discovered_list(DiscoveredList& refs_list); - void abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]); // Calculate the number of jni handles. unsigned int count_jni_refs(); @@ -217,6 +224,8 @@ _discovery_is_atomic(true), _enqueuing_is_done(false), _discovery_is_mt(false), + _discovered_list_needs_barrier(false), + _bs(NULL), _is_alive_non_header(NULL), _num_q(0), _processing_is_mt(false), @@ -224,8 +233,10 @@ {} ReferenceProcessor(MemRegion span, bool atomic_discovery, - bool mt_discovery, int mt_degree = 1, - bool mt_processing = false); + bool mt_discovery, + int mt_degree = 1, + bool mt_processing = false, + bool discovered_list_needs_barrier = false); // Allocates and initializes a reference processor. static ReferenceProcessor* create_ref_processor( @@ -234,8 +245,8 @@ bool mt_discovery, BoolObjectClosure* is_alive_non_header = NULL, int parallel_gc_threads = 1, - bool mt_processing = false); - + bool mt_processing = false, + bool discovered_list_needs_barrier = false); // RefDiscoveryPolicy values enum { ReferenceBasedDiscovery = 0, @@ -296,6 +307,11 @@ // Enqueue references at end of GC (called by the garbage collector) bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL); + // If a discovery is in process that is being superceded, abandon it: all + // the discovered lists will be empty, and all the objects on them will + // have NULL discovered fields. Must be called only at a safepoint. + void abandon_partial_discovery(); + // debugging void verify_no_references_recorded() PRODUCT_RETURN; static void verify(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/sharedHeap.cpp --- a/src/share/vm/memory/sharedHeap.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/sharedHeap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -57,15 +57,24 @@ } _sh = this; // ch is static, should be set only once. if ((UseParNewGC || - (UseConcMarkSweepGC && CMSParallelRemarkEnabled)) && + (UseConcMarkSweepGC && CMSParallelRemarkEnabled) || + UseG1GC) && ParallelGCThreads > 0) { - _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, true); + _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, + /* are_GC_task_threads */true, + /* are_ConcurrentGC_threads */false); if (_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); } } } +bool SharedHeap::heap_lock_held_for_gc() { + Thread* t = Thread::current(); + return Heap_lock->owned_by_self() + || ( (t->is_GC_task_thread() || t->is_VM_thread()) + && _thread_holds_heap_lock_for_gc); +} void SharedHeap::set_par_threads(int t) { _n_par_threads = t; @@ -280,10 +289,11 @@ } // Some utilities. -void SharedHeap::print_size_transition(size_t bytes_before, +void SharedHeap::print_size_transition(outputStream* out, + size_t bytes_before, size_t bytes_after, size_t capacity) { - tty->print(" %d%s->%d%s(%d%s)", + out->print(" %d%s->%d%s(%d%s)", byte_size_in_proper_unit(bytes_before), proper_unit_for_byte_size(bytes_before), byte_size_in_proper_unit(bytes_after), diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/sharedHeap.hpp --- a/src/share/vm/memory/sharedHeap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/sharedHeap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -44,6 +44,9 @@ class SharedHeap : public CollectedHeap { friend class VMStructs; + friend class VM_GC_Operation; + friend class VM_CGC_Operation; + private: // For claiming strong_roots tasks. SubTasksDone* _process_strong_tasks; @@ -82,6 +85,14 @@ // function. SharedHeap(CollectorPolicy* policy_); + // Returns true if the calling thread holds the heap lock, + // or the calling thread is a par gc thread and the heap_lock is held + // by the vm thread doing a gc operation. + bool heap_lock_held_for_gc(); + // True if the heap_lock is held by the a non-gc thread invoking a gc + // operation. + bool _thread_holds_heap_lock_for_gc; + public: static SharedHeap* heap() { return _sh; } @@ -97,8 +108,8 @@ void set_perm(PermGen* perm_gen) { _perm_gen = perm_gen; } - // A helper function that fills an allocated-but-not-yet-initialized - // region with a garbage object. + // A helper function that fills a region of the heap with + // with a single object. static void fill_region_with_object(MemRegion mr); // Minimum garbage fill object size @@ -214,13 +225,12 @@ // "SharedHeap" can use in the implementation of its virtual // functions. -protected: +public: // Do anything common to GC's. virtual void gc_prologue(bool full) = 0; virtual void gc_epilogue(bool full) = 0; -public: // // New methods from CollectedHeap // @@ -266,7 +276,8 @@ } // Some utilities. - void print_size_transition(size_t bytes_before, + void print_size_transition(outputStream* out, + size_t bytes_before, size_t bytes_after, size_t capacity); }; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.cpp --- a/src/share/vm/memory/space.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/space.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -105,7 +105,7 @@ "Only ones we deal with for now."); assert(_precision != CardTableModRefBS::ObjHeadPreciseArray || - _last_bottom == NULL || + _cl->idempotent() || _last_bottom == NULL || top <= _last_bottom, "Not decreasing"); NOT_PRODUCT(_last_bottom = mr.start()); @@ -144,7 +144,14 @@ walk_mem_region(mr, bottom_obj, top); } - _min_done = bottom; + // An idempotent closure might be applied in any order, so we don't + // record a _min_done for it. + if (!_cl->idempotent()) { + _min_done = bottom; + } else { + assert(_min_done == _last_explicit_min_done, + "Don't update _min_done for idempotent cl"); + } } DirtyCardToOopClosure* Space::new_dcto_cl(OopClosure* cl, @@ -232,13 +239,17 @@ return new ContiguousSpaceDCTOC(this, cl, precision, boundary); } -void Space::initialize(MemRegion mr, bool clear_space) { +void Space::set_bounds(MemRegion mr) { HeapWord* bottom = mr.start(); HeapWord* end = mr.end(); assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end), "invalid space boundaries"); set_bottom(bottom); set_end(end); +} + +void Space::initialize(MemRegion mr, bool clear_space) { + set_bounds(mr); if (clear_space) clear(); } @@ -246,20 +257,35 @@ if (ZapUnusedHeapArea) mangle_unused_area(); } -void ContiguousSpace::initialize(MemRegion mr, bool clear_space) -{ - CompactibleSpace::initialize(mr, clear_space); - _concurrent_iteration_safe_limit = top(); +void CompactibleSpace::initialize(MemRegion mr, bool clear_space) { + Space::initialize(mr, false); // We'll do the clearing if there's + // clearing to be done. + _compaction_top = bottom(); + _next_compaction_space = NULL; + if (clear_space) clear(); +} + +void CompactibleSpace::clear() { + _compaction_top = bottom(); + Space::clear(); +} + +void ContiguousSpace::initialize(MemRegion mr, bool clear_space) { + CompactibleSpace::initialize(mr, false); // We'll do the clearing if there's + // clearing to be done. + set_top(bottom()); + set_saved_mark(); + if (clear_space) clear(); } void ContiguousSpace::clear() { set_top(bottom()); set_saved_mark(); - Space::clear(); + CompactibleSpace::clear(); } bool Space::is_in(const void* p) const { - HeapWord* b = block_start(p); + HeapWord* b = block_start_const(p); return b != NULL && block_is_obj(b); } @@ -271,8 +297,17 @@ return p >= _top; } +void OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space) { + // false ==> we'll do the clearing if there's clearing to be done. + ContiguousSpace::initialize(mr, false); + _offsets.zero_bottom_entry(); + _offsets.initialize_threshold(); + if (clear_space) clear(); +} + void OffsetTableContigSpace::clear() { ContiguousSpace::clear(); + _offsets.zero_bottom_entry(); _offsets.initialize_threshold(); } @@ -297,12 +332,6 @@ debug_only(Copy::fill_to_words(mr.start(), mr.word_size(), badHeapWord)); } -void CompactibleSpace::initialize(MemRegion mr, bool clear_space) { - Space::initialize(mr, clear_space); - _compaction_top = bottom(); - _next_compaction_space = NULL; -} - HeapWord* CompactibleSpace::forward(oop q, size_t size, CompactPoint* cp, HeapWord* compact_top) { // q is alive @@ -477,8 +506,8 @@ } guarantee(p == top(), "end of last object must match end of space"); if (top() != end()) { - guarantee(top() == block_start(end()-1) && - top() == block_start(top()), + guarantee(top() == block_start_const(end()-1) && + top() == block_start_const(top()), "top should be start of unallocated block, if it exists"); } } @@ -710,7 +739,7 @@ #undef ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN // Very general, slow implementation. -HeapWord* ContiguousSpace::block_start(const void* p) const { +HeapWord* ContiguousSpace::block_start_const(const void* p) const { assert(MemRegion(bottom(), end()).contains(p), "p not in space"); if (p >= top()) { return top(); @@ -913,7 +942,8 @@ // For a sampling of objects in the space, find it using the // block offset table. if (blocks == BLOCK_SAMPLE_INTERVAL) { - guarantee(p == block_start(p + (size/2)), "check offset computation"); + guarantee(p == block_start_const(p + (size/2)), + "check offset computation"); blocks = 0; } else { blocks++; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.hpp --- a/src/share/vm/memory/space.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/space.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -105,7 +105,7 @@ virtual void set_bottom(HeapWord* value) { _bottom = value; } virtual void set_end(HeapWord* value) { _end = value; } - HeapWord* saved_mark_word() const { return _saved_mark_word; } + virtual HeapWord* saved_mark_word() const { return _saved_mark_word; } void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; } MemRegionClosure* preconsumptionDirtyCardClosure() const { @@ -131,8 +131,18 @@ return MemRegion(bottom(), saved_mark_word()); } - // Initialization + // Initialization. + // "initialize" should be called once on a space, before it is used for + // any purpose. The "mr" arguments gives the bounds of the space, and + // the "clear_space" argument should be true unless the memory in "mr" is + // known to be zeroed. virtual void initialize(MemRegion mr, bool clear_space); + + // Sets the bounds (bottom and end) of the current space to those of "mr." + void set_bounds(MemRegion mr); + + // The "clear" method must be called on a region that may have + // had allocation performed in it, but is now to be considered empty. virtual void clear(); // For detecting GC bugs. Should only be called at GC boundaries, since @@ -216,7 +226,13 @@ // "block" that contains "p". We say "block" instead of "object" since // some heaps may not pack objects densely; a chunk may either be an // object or a non-object. If "p" is not in the space, return NULL. - virtual HeapWord* block_start(const void* p) const = 0; + virtual HeapWord* block_start_const(const void* p) const = 0; + + // The non-const version may have benevolent side effects on the data + // structure supporting these calls, possibly speeding up future calls. + // The default implementation, however, is simply to call the const + // version. + inline virtual HeapWord* block_start(const void* p); // Requires "addr" to be the start of a chunk, and returns its size. // "addr + size" is required to be the start of a new chunk, or the end @@ -282,12 +298,13 @@ CardTableModRefBS::PrecisionStyle _precision; HeapWord* _boundary; // If non-NULL, process only non-NULL oops // pointing below boundary. - HeapWord* _min_done; // ObjHeadPreciseArray precision requires + HeapWord* _min_done; // ObjHeadPreciseArray precision requires // a downwards traversal; this is the // lowest location already done (or, // alternatively, the lowest address that // shouldn't be done again. NULL means infinity.) NOT_PRODUCT(HeapWord* _last_bottom;) + NOT_PRODUCT(HeapWord* _last_explicit_min_done;) // Get the actual top of the area on which the closure will // operate, given where the top is assumed to be (the end of the @@ -311,13 +328,15 @@ HeapWord* boundary) : _sp(sp), _cl(cl), _precision(precision), _boundary(boundary), _min_done(NULL) { - NOT_PRODUCT(_last_bottom = NULL;) + NOT_PRODUCT(_last_bottom = NULL); + NOT_PRODUCT(_last_explicit_min_done = NULL); } void do_MemRegion(MemRegion mr); void set_min_done(HeapWord* min_done) { _min_done = min_done; + NOT_PRODUCT(_last_explicit_min_done = _min_done); } #ifndef PRODUCT void set_last_bottom(HeapWord* last_bottom) { @@ -355,6 +374,7 @@ public: virtual void initialize(MemRegion mr, bool clear_space); + virtual void clear(); // Used temporarily during a compaction phase to hold the value // top should have when compaction is complete. @@ -511,7 +531,7 @@ /* prefetch beyond q */ \ Prefetch::write(q, interval); \ /* size_t size = oop(q)->size(); changing this for cms for perm gen */\ - size_t size = block_size(q); \ + size_t size = block_size(q); \ compact_top = cp->space->forward(oop(q), size, cp, compact_top); \ q += size; \ end_of_live = q; \ @@ -575,68 +595,68 @@ cp->space->set_compaction_top(compact_top); \ } -#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) { \ - /* adjust all the interior pointers to point at the new locations of objects \ - * Used by MarkSweep::mark_sweep_phase3() */ \ +#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) { \ + /* adjust all the interior pointers to point at the new locations of objects \ + * Used by MarkSweep::mark_sweep_phase3() */ \ \ - HeapWord* q = bottom(); \ - HeapWord* t = _end_of_live; /* Established by "prepare_for_compaction". */ \ + HeapWord* q = bottom(); \ + HeapWord* t = _end_of_live; /* Established by "prepare_for_compaction". */ \ \ - assert(_first_dead <= _end_of_live, "Stands to reason, no?"); \ + assert(_first_dead <= _end_of_live, "Stands to reason, no?"); \ \ - if (q < t && _first_dead > q && \ + if (q < t && _first_dead > q && \ !oop(q)->is_gc_marked()) { \ /* we have a chunk of the space which hasn't moved and we've \ * reinitialized the mark word during the previous pass, so we can't \ - * use is_gc_marked for the traversal. */ \ + * use is_gc_marked for the traversal. */ \ HeapWord* end = _first_dead; \ \ - while (q < end) { \ - /* I originally tried to conjoin "block_start(q) == q" to the \ - * assertion below, but that doesn't work, because you can't \ - * accurately traverse previous objects to get to the current one \ - * after their pointers (including pointers into permGen) have been \ - * updated, until the actual compaction is done. dld, 4/00 */ \ - assert(block_is_obj(q), \ - "should be at block boundaries, and should be looking at objs"); \ + while (q < end) { \ + /* I originally tried to conjoin "block_start(q) == q" to the \ + * assertion below, but that doesn't work, because you can't \ + * accurately traverse previous objects to get to the current one \ + * after their pointers (including pointers into permGen) have been \ + * updated, until the actual compaction is done. dld, 4/00 */ \ + assert(block_is_obj(q), \ + "should be at block boundaries, and should be looking at objs"); \ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q))); \ \ - /* point all the oops to the new location */ \ - size_t size = oop(q)->adjust_pointers(); \ - size = adjust_obj_size(size); \ + /* point all the oops to the new location */ \ + size_t size = oop(q)->adjust_pointers(); \ + size = adjust_obj_size(size); \ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ - \ + \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size)); \ - \ + \ q += size; \ - } \ + } \ \ - if (_first_dead == t) { \ - q = t; \ - } else { \ - /* $$$ This is funky. Using this to read the previously written \ - * LiveRange. See also use below. */ \ + if (_first_dead == t) { \ + q = t; \ + } else { \ + /* $$$ This is funky. Using this to read the previously written \ + * LiveRange. See also use below. */ \ q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer(); \ - } \ - } \ + } \ + } \ \ const intx interval = PrefetchScanIntervalInBytes; \ \ - debug_only(HeapWord* prev_q = NULL); \ - while (q < t) { \ - /* prefetch beyond q */ \ + debug_only(HeapWord* prev_q = NULL); \ + while (q < t) { \ + /* prefetch beyond q */ \ Prefetch::write(q, interval); \ - if (oop(q)->is_gc_marked()) { \ - /* q is alive */ \ + if (oop(q)->is_gc_marked()) { \ + /* q is alive */ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q))); \ - /* point all the oops to the new location */ \ - size_t size = oop(q)->adjust_pointers(); \ - size = adjust_obj_size(size); \ - VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ + /* point all the oops to the new location */ \ + size_t size = oop(q)->adjust_pointers(); \ + size = adjust_obj_size(size); \ + VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size)); \ - debug_only(prev_q = q); \ + debug_only(prev_q = q); \ q += size; \ } else { \ /* q is not a live object, so its mark should point at the next \ @@ -716,6 +736,8 @@ } \ } \ \ + /* Let's remember if we were empty before we did the compaction. */ \ + bool was_empty = used_region().is_empty(); \ /* Reset space after compaction is complete */ \ reset_after_compaction(); \ /* We do this clear, below, since it has overloaded meanings for some */ \ @@ -723,8 +745,8 @@ /* compacted into will have had their offset table thresholds updated */ \ /* continuously, but those that weren't need to have their thresholds */ \ /* re-initialized. Also mangles unused area for debugging. */ \ - if (is_empty()) { \ - clear(); \ + if (used_region().is_empty()) { \ + if (!was_empty) clear(); \ } else { \ if (ZapUnusedHeapArea) mangle_unused_area(); \ } \ @@ -750,8 +772,8 @@ HeapWord* top() const { return _top; } void set_top(HeapWord* value) { _top = value; } - void set_saved_mark() { _saved_mark_word = top(); } - void reset_saved_mark() { _saved_mark_word = bottom(); } + virtual void set_saved_mark() { _saved_mark_word = top(); } + void reset_saved_mark() { _saved_mark_word = bottom(); } virtual void clear(); @@ -843,7 +865,7 @@ virtual void object_iterate_from(WaterMark mark, ObjectClosure* blk); // Very inefficient implementation. - virtual HeapWord* block_start(const void* p) const; + virtual HeapWord* block_start_const(const void* p) const; size_t block_size(const HeapWord* p) const; // If a block is in the allocated area, it is an object. bool block_is_obj(const HeapWord* p) const { return p < top(); } @@ -1000,9 +1022,10 @@ void set_bottom(HeapWord* value); void set_end(HeapWord* value); + virtual void initialize(MemRegion mr, bool clear_space); void clear(); - inline HeapWord* block_start(const void* p) const; + inline HeapWord* block_start_const(const void* p) const; // Add offset table update. virtual inline HeapWord* allocate(size_t word_size); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.inline.hpp --- a/src/share/vm/memory/space.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/space.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -22,6 +22,10 @@ * */ +inline HeapWord* Space::block_start(const void* p) { + return block_start_const(p); +} + inline HeapWord* OffsetTableContigSpace::allocate(size_t size) { HeapWord* res = ContiguousSpace::allocate(size); if (res != NULL) { @@ -50,7 +54,8 @@ return res; } -inline HeapWord* OffsetTableContigSpace::block_start(const void* p) const { +inline HeapWord* +OffsetTableContigSpace::block_start_const(const void* p) const { return _offsets.block_start(p); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/specialized_oop_closures.hpp --- a/src/share/vm/memory/specialized_oop_closures.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/specialized_oop_closures.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -59,6 +59,12 @@ // This is split into several because of a Visual C++ 6.0 compiler bug // where very long macros cause the compiler to crash +// Some other heap might define further specialized closures. +#ifndef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES +#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ + /* None */ +#endif + #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_S(f) \ f(ScanClosure,_nv) \ f(FastScanClosure,_nv) \ @@ -77,7 +83,7 @@ SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_P(f) #ifndef SERIALGC -#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) \ +#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) \ f(MarkRefsIntoAndScanClosure,_nv) \ f(Par_MarkRefsIntoAndScanClosure,_nv) \ f(PushAndMarkClosure,_nv) \ @@ -85,11 +91,13 @@ f(PushOrMarkClosure,_nv) \ f(Par_PushOrMarkClosure,_nv) \ f(CMSKeepAliveClosure,_nv) \ - f(CMSInnerParMarkAndPushClosure,_nv) + f(CMSInnerParMarkAndPushClosure,_nv) \ + FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) #else // SERIALGC -#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) +#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) #endif // SERIALGC + // We separate these out, because sometime the general one has // a different definition from the specialized ones, and sometimes it // doesn't. @@ -98,8 +106,8 @@ f(OopClosure,_v) \ SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(f) -#define ALL_OOP_OOP_ITERATE_CLOSURES_3(f) \ - SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) +#define ALL_OOP_OOP_ITERATE_CLOSURES_2(f) \ + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) #ifndef SERIALGC // This macro applies an argument macro to all OopClosures for which we @@ -125,6 +133,13 @@ // The "root_class" is the most general class to define; this may be // "OopClosure" in some applications and "OopsInGenClosure" in others. + +// Some other heap might define further specialized closures. +#ifndef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES +#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) \ + /* None */ +#endif + #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_S(f) \ f(ScanClosure,_nv) \ f(FastScanClosure,_nv) @@ -132,7 +147,8 @@ #ifndef SERIALGC #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) \ f(ParScanWithBarrierClosure,_nv) \ - f(ParScanWithoutBarrierClosure,_nv) + f(ParScanWithoutBarrierClosure,_nv) \ + FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) #else // SERIALGC #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) #endif // SERIALGC @@ -179,13 +195,15 @@ #if ENABLE_SPECIALIZATION_STATS private: - static int _numCallsAll; + static bool _init; + static bool _wrapped; + static jint _numCallsAll; - static int _numCallsTotal[NUM_Kinds]; - static int _numCalls_nv[NUM_Kinds]; + static jint _numCallsTotal[NUM_Kinds]; + static jint _numCalls_nv[NUM_Kinds]; - static int _numDoOopCallsTotal[NUM_Kinds]; - static int _numDoOopCalls_nv[NUM_Kinds]; + static jint _numDoOopCallsTotal[NUM_Kinds]; + static jint _numDoOopCalls_nv[NUM_Kinds]; public: #endif static void clear() PRODUCT_RETURN; @@ -203,22 +221,22 @@ #if ENABLE_SPECIALIZATION_STATS inline void SpecializationStats::record_call() { - _numCallsAll++;; + Atomic::inc(&_numCallsAll); } inline void SpecializationStats::record_iterate_call_v(Kind k) { - _numCallsTotal[k]++; + Atomic::inc(&_numCallsTotal[k]); } inline void SpecializationStats::record_iterate_call_nv(Kind k) { - _numCallsTotal[k]++; - _numCalls_nv[k]++; + Atomic::inc(&_numCallsTotal[k]); + Atomic::inc(&_numCalls_nv[k]); } inline void SpecializationStats::record_do_oop_call_v(Kind k) { - _numDoOopCallsTotal[k]++; + Atomic::inc(&_numDoOopCallsTotal[k]); } inline void SpecializationStats::record_do_oop_call_nv(Kind k) { - _numDoOopCallsTotal[k]++; - _numDoOopCalls_nv[k]++; + Atomic::inc(&_numDoOopCallsTotal[k]); + Atomic::inc(&_numDoOopCalls_nv[k]); } #else // !ENABLE_SPECIALIZATION_STATS diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/universe.cpp --- a/src/share/vm/memory/universe.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/memory/universe.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -734,6 +734,15 @@ fatal("UseParallelGC not supported in java kernel vm."); #endif // SERIALGC + } else if (UseG1GC) { +#ifndef SERIALGC + G1CollectorPolicy* g1p = new G1CollectorPolicy_BestRegionsFirst(); + G1CollectedHeap* g1h = new G1CollectedHeap(g1p); + Universe::_collectedHeap = g1h; +#else // SERIALGC + fatal("UseG1GC not supported in java kernel vm."); +#endif // SERIALGC + } else { GenCollectorPolicy *gc_policy; @@ -933,7 +942,10 @@ // This needs to be done before the first scavenge/gc, since // it's an input to soft ref clearing policy. - Universe::update_heap_info_at_gc(); + { + MutexLocker x(Heap_lock); + Universe::update_heap_info_at_gc(); + } // ("weak") refs processing infrastructure initialization Universe::heap()->post_initialize(); @@ -1189,10 +1201,11 @@ // ???: What if a CollectedHeap doesn't have a permanent generation? ShouldNotReachHere(); break; - case CollectedHeap::GenCollectedHeap: { - GenCollectedHeap* gch = (GenCollectedHeap*) Universe::heap(); - permanent_reserved = gch->perm_gen()->reserved(); - break; + case CollectedHeap::GenCollectedHeap: + case CollectedHeap::G1CollectedHeap: { + SharedHeap* sh = (SharedHeap*) Universe::heap(); + permanent_reserved = sh->perm_gen()->reserved(); + break; } #ifndef SERIALGC case CollectedHeap::ParallelScavengeHeap: { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/generateOopMap.cpp --- a/src/share/vm/oops/generateOopMap.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/generateOopMap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -370,21 +370,8 @@ void GenerateOopMap ::initialize_bb() { _gc_points = 0; _bb_count = 0; - int size = binsToHold(method()->code_size()); - _bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t,size); - memset(_bb_hdr_bits, 0, size*sizeof(uintptr_t)); -} - -void GenerateOopMap ::set_bbmark_bit(int bci) { - int idx = bci >> LogBitsPerWord; - uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1)); - _bb_hdr_bits[idx] |= bit; -} - -void GenerateOopMap ::clear_bbmark_bit(int bci) { - int idx = bci >> LogBitsPerWord; - uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1)); - _bb_hdr_bits[idx] &= (~bit); + _bb_hdr_bits.clear(); + _bb_hdr_bits.resize(method()->code_size()); } void GenerateOopMap::bb_mark_fct(GenerateOopMap *c, int bci, int *data) { @@ -952,6 +939,17 @@ _basic_blocks[bbNo-1]._end_bci = prev_bci; + // Check that the correct number of basicblocks was found + if (bbNo !=_bb_count) { + if (bbNo < _bb_count) { + verify_error("jump into the middle of instruction?"); + return; + } else { + verify_error("extra basic blocks - should not happen?"); + return; + } + } + _max_monitors = monitor_count; // Now that we have a bound on the depth of the monitor stack, we can @@ -985,17 +983,6 @@ } #endif - // Check that the correct number of basicblocks was found - if (bbNo !=_bb_count) { - if (bbNo < _bb_count) { - verify_error("jump into the middle of instruction?"); - return; - } else { - verify_error("extra basic blocks - should not happen?"); - return; - } - } - // Mark all alive blocks mark_reachable_code(); } @@ -1022,21 +1009,22 @@ int new_method_size) { assert(new_method_size >= method()->code_size() + delta, "new method size is too small"); - int newWords = binsToHold(new_method_size); - uintptr_t * new_bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t, newWords); + BitMap::bm_word_t* new_bb_hdr_bits = + NEW_RESOURCE_ARRAY(BitMap::bm_word_t, + BitMap::word_align_up(new_method_size)); + _bb_hdr_bits.set_map(new_bb_hdr_bits); + _bb_hdr_bits.set_size(new_method_size); + _bb_hdr_bits.clear(); - BitMap bb_bits(new_bb_hdr_bits, new_method_size); - bb_bits.clear(); for(int k = 0; k < _bb_count; k++) { if (_basic_blocks[k]._bci > bci) { _basic_blocks[k]._bci += delta; _basic_blocks[k]._end_bci += delta; } - bb_bits.at_put(_basic_blocks[k]._bci, true); + _bb_hdr_bits.at_put(_basic_blocks[k]._bci, true); } - _bb_hdr_bits = new_bb_hdr_bits ; } // diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/generateOopMap.hpp --- a/src/share/vm/oops/generateOopMap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/generateOopMap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -341,16 +341,22 @@ BasicBlock * _basic_blocks; // Array of basicblock info int _gc_points; int _bb_count; - uintptr_t * _bb_hdr_bits; + BitMap _bb_hdr_bits; // Basicblocks methods void initialize_bb (); void mark_bbheaders_and_count_gc_points(); - bool is_bb_header (int bci) const { return (_bb_hdr_bits[bci >> LogBitsPerWord] & ((uintptr_t)1 << (bci & (BitsPerWord-1)))) != 0; } + bool is_bb_header (int bci) const { + return _bb_hdr_bits.at(bci); + } int gc_points () const { return _gc_points; } int bb_count () const { return _bb_count; } - void set_bbmark_bit (int bci); - void clear_bbmark_bit (int bci); + void set_bbmark_bit (int bci) { + _bb_hdr_bits.at_put(bci, true); + } + void clear_bbmark_bit (int bci) { + _bb_hdr_bits.at_put(bci, false); + } BasicBlock * get_basic_block_at (int bci) const; BasicBlock * get_basic_block_containing (int bci) const; void interp_bb (BasicBlock *bb); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1515,10 +1515,9 @@ // closure's do_header() method dicates whether the given closure should be // applied to the klass ptr in the object header. -#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, \ - OopClosureType* closure) {\ +#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik);\ /* header */ \ if (closure->do_header()) { \ @@ -1533,6 +1532,26 @@ return size_helper(); \ } +#ifndef SERIALGC +#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, \ + OopClosureType* closure) { \ + SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \ + /* header */ \ + if (closure->do_header()) { \ + obj->oop_iterate_header(closure); \ + } \ + /* instance variables */ \ + InstanceKlass_OOP_MAP_REVERSE_ITERATE( \ + obj, \ + SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::ik);\ + (closure)->do_oop##nv_suffix(p), \ + assert_is_in_closed_subset) \ + return size_helper(); \ +} +#endif // !SERIALGC + #define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ \ int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, \ @@ -1550,9 +1569,13 @@ } ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN) ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) +#ifndef SERIALGC +ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +#endif // !SERIALGC void instanceKlass::iterate_static_fields(OopClosure* closure) { InstanceKlass_OOP_ITERATE( \ diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/instanceKlass.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -656,13 +656,21 @@ return oop_oop_iterate_v_m(obj, blk, mr); } -#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, \ +#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ + int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, \ MemRegion mr); ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC void iterate_static_fields(OopClosure* closure); void iterate_static_fields(OopClosure* closure, MemRegion mr); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceRefKlass.cpp --- a/src/share/vm/oops/instanceRefKlass.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/instanceRefKlass.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -176,6 +176,11 @@ } #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains) \ + if (closure->apply_to_weak_ref_discovered_field()) { \ + T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); \ + closure->do_oop##nv_suffix(disc_addr); \ + } \ + \ T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj); \ oop referent = oopDesc::load_decode_heap_oop(referent_addr); \ if (referent != NULL && contains(referent_addr)) { \ @@ -219,6 +224,25 @@ } \ } +#ifndef SERIALGC +#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceRefKlass:: \ +oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ + /* Get size before changing pointers */ \ + SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::irk);\ + \ + int size = instanceKlass::oop_oop_iterate_backwards##nv_suffix(obj, closure); \ + \ + if (UseCompressedOops) { \ + InstanceRefKlass_SPECIALIZED_OOP_ITERATE(narrowOop, nv_suffix, contains); \ + } else { \ + InstanceRefKlass_SPECIALIZED_OOP_ITERATE(oop, nv_suffix, contains); \ + } \ +} +#endif // !SERIALGC + + #define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ \ int instanceRefKlass:: \ @@ -236,9 +260,13 @@ } ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) +#ifndef SERIALGC +ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +#endif // SERIALGC ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) #ifndef SERIALGC template @@ -423,7 +451,7 @@ // Verify next field oop next = java_lang_ref_Reference::next(obj); if (next != NULL) { - guarantee(next->is_oop(), "next field verify fa iled"); + guarantee(next->is_oop(), "next field verify failed"); guarantee(next->is_instanceRef(), "next field verify failed"); if (gch != NULL && !gch->is_in_youngest(obj)) { // We do a specific remembered set check here since the next field is diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceRefKlass.hpp --- a/src/share/vm/oops/instanceRefKlass.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/instanceRefKlass.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -72,7 +72,15 @@ int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr); ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock); static void acquire_pending_list_lock(BasicLock *pending_list_basic_lock); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/klass.hpp --- a/src/share/vm/oops/klass.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/klass.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -134,14 +134,14 @@ // Every subclass on which vtbl_value is called must include this macro. // Delay the installation of the klassKlass pointer until after the // the vtable for a new klass has been installed (after the call to new()). -#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \ +#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \ void* allocate_permanent(KlassHandle& klass_klass, int size, TRAPS) const { \ - void* result = new(klass_klass, size, THREAD) thisKlass(); \ - if (HAS_PENDING_EXCEPTION) return NULL; \ - klassOop new_klass = ((Klass*) result)->as_klassOop(); \ - OrderAccess::storestore(); \ - post_new_init_klass(klass_klass, new_klass, size); \ - return result; \ + void* result = new(klass_klass, size, THREAD) thisKlass(); \ + if (HAS_PENDING_EXCEPTION) return NULL; \ + klassOop new_klass = ((Klass*) result)->as_klassOop(); \ + OrderAccess::storestore(); \ + post_new_init_klass(klass_klass, new_klass, size); \ + return result; \ } bool null_vtbl() { return *(intptr_t*)this == 0; } @@ -694,6 +694,14 @@ return oop_oop_iterate(obj, blk); } +#ifndef SERIALGC + // In case we don't have a specialized backward scanner use forward + // iteration. + virtual int oop_oop_iterate_backwards_v(oop obj, OopClosure* blk) { + return oop_oop_iterate_v(obj, blk); + } +#endif // !SERIALGC + // Iterates "blk" over all the oops in "obj" (of type "this") within "mr". // (I don't see why the _m should be required, but without it the Solaris // C++ gives warning messages about overridings of the "oop_oop_iterate" @@ -722,7 +730,19 @@ } SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL) - SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(Klass_OOP_OOP_ITERATE_DECL) + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + virtual int oop_oop_iterate_backwards##nv_suffix(oop obj, \ + OopClosureType* blk) { \ + /* Default implementation reverts to general version. */ \ + return oop_oop_iterate_backwards_v(obj, blk); \ + } + + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC virtual void array_klasses_do(void f(klassOop k)) {} virtual void with_array_klasses_do(void f(klassOop k)); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/markOop.hpp --- a/src/share/vm/oops/markOop.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/markOop.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -216,11 +216,7 @@ static markOop INFLATING() { return (markOop) 0; } // inflate-in-progress // Should this header be preserved during GC? - bool must_be_preserved(oop obj_containing_mark) const { - if (!UseBiasedLocking) - return (!is_unlocked() || !has_no_hash()); - return must_be_preserved_with_bias(obj_containing_mark); - } + inline bool must_be_preserved(oop obj_containing_mark) const; inline bool must_be_preserved_with_bias(oop obj_containing_mark) const; // Should this header (including its age bits) be preserved in the @@ -240,22 +236,14 @@ // observation is that promotion failures are quite rare and // reducing the number of mark words preserved during them isn't a // high priority. - bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const { - if (!UseBiasedLocking) - return (this != prototype()); - return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark); - } + inline bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const; inline bool must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const; // Should this header be preserved during a scavenge where CMS is // the old generation? // (This is basically the same body as must_be_preserved_for_promotion_failure(), // but takes the klassOop as argument instead) - bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { - if (!UseBiasedLocking) - return (this != prototype()); - return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark); - } + inline bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const; inline bool must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const; // WARNING: The following routines are used EXCLUSIVELY by diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/markOop.inline.hpp --- a/src/share/vm/oops/markOop.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/markOop.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -39,6 +39,12 @@ return (!is_unlocked() || !has_no_hash()); } +inline bool markOopDesc::must_be_preserved(oop obj_containing_mark) const { + if (!UseBiasedLocking) + return (!is_unlocked() || !has_no_hash()); + return must_be_preserved_with_bias(obj_containing_mark); +} + // Should this header (including its age bits) be preserved in the // case of a promotion failure during scavenge? inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const { @@ -59,6 +65,13 @@ return (this != prototype()); } +inline bool markOopDesc::must_be_preserved_for_promotion_failure(oop obj_containing_mark) const { + if (!UseBiasedLocking) + return (this != prototype()); + return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark); +} + + // Should this header (including its age bits) be preserved in the // case of a scavenge in which CMS is the old generation? inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { @@ -70,6 +83,11 @@ } return (this != prototype()); } +inline bool markOopDesc::must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { + if (!UseBiasedLocking) + return (this != prototype()); + return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark); +} inline markOop markOopDesc::prototype_for_object(oop obj) { #ifdef ASSERT diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayKlass.cpp --- a/src/share/vm/oops/objArrayKlass.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/objArrayKlass.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -86,14 +86,18 @@ const size_t word_len = objArrayOopDesc::array_size(length); - // For performance reasons, we assume we are using a card marking write - // barrier. The assert will fail if this is not the case. BarrierSet* bs = Universe::heap()->barrier_set(); + // For performance reasons, we assume we are that the write barrier we + // are using has optimized modes for arrays of references. At least one + // of the asserts below will fail if this is not the case. assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt"); + assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well."); + MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len); if (s == d) { // since source and destination are equal we do not need conversion checks. assert(length > 0, "sanity check"); + bs->write_ref_array_pre(dst_mr); Copy::conjoint_oops_atomic(src, dst, length); } else { // We have to make sure all elements conform to the destination array @@ -101,6 +105,7 @@ klassOop stype = objArrayKlass::cast(s->klass())->element_klass(); if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) { // elements are guaranteed to be subtypes, so no check necessary + bs->write_ref_array_pre(dst_mr); Copy::conjoint_oops_atomic(src, dst, length); } else { // slow case: need individual subtype checks @@ -110,8 +115,13 @@ for (T* p = dst; from < end; from++, p++) { // XXX this is going to be slow. T element = *from; - if (oopDesc::is_null(element) || - Klass::cast(oopDesc::decode_heap_oop_not_null(element)->klass())->is_subtype_of(bound)) { + // even slower now + bool element_is_null = oopDesc::is_null(element); + oop new_val = element_is_null ? oop(NULL) + : oopDesc::decode_heap_oop_not_null(element); + if (element_is_null || + Klass::cast((new_val->klass()))->is_subtype_of(bound)) { + bs->write_ref_field_pre(p, new_val); *p = *from; } else { // We must do a barrier to cover the partial copy. @@ -401,11 +411,11 @@ } ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) int objArrayKlass::oop_adjust_pointers(oop obj) { assert(obj->is_objArray(), "obj must be obj array"); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayKlass.hpp --- a/src/share/vm/oops/objArrayKlass.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/objArrayKlass.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -111,7 +111,7 @@ int start, int end); ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL) // JVM support jint compute_modifier_flags(TRAPS) const; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayOop.cpp --- a/src/share/vm/oops/objArrayOop.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/objArrayOop.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -33,4 +33,4 @@ } ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DEFN) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayOop.hpp --- a/src/share/vm/oops/objArrayOop.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/objArrayOop.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -29,6 +29,8 @@ friend class objArrayKlass; friend class Runtime1; friend class psPromotionManager; + friend class CSMarkOopClosure; + friend class G1ParScanPartialArrayClosure; template T* obj_at_addr(int index) const { assert(is_within_bounds(index), "index out of bounds"); @@ -88,5 +90,5 @@ int oop_iterate_range(OopClosureType* blk, int start, int end); ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DECL) }; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/oop.hpp --- a/src/share/vm/oops/oop.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/oop.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -358,12 +358,21 @@ static void set_bs(BarrierSet* bs) { _bs = bs; } // iterators, returns size of object -#define OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ +#define OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ int oop_iterate(OopClosureType* blk); \ int oop_iterate(OopClosureType* blk, MemRegion mr); // Only in mr. ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DECL) + +#ifndef SERIALGC + +#define OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_iterate_backwards(OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DECL) +#endif void oop_iterate_header(OopClosure* blk); void oop_iterate_header(OopClosure* blk, MemRegion mr); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/oop.inline.hpp --- a/src/share/vm/oops/oop.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/oops/oop.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -380,10 +380,11 @@ s = (int)((size_t)round_to(size_in_bytes, MinObjAlignmentInBytes) / HeapWordSize); - // UseParNewGC can change the length field of an "old copy" of an object - // array in the young gen so it indicates the stealable portion of - // an already copied array. This will cause the first disjunct below - // to fail if the sizes are computed across such a concurrent change. + // UseParNewGC, UseParallelGC and UseG1GC can change the length field + // of an "old copy" of an object array in the young gen so it indicates + // the grey portion of an already copied array. This will cause the first + // disjunct below to fail if the two comparands are computed across such + // a concurrent change. // UseParNewGC also runs with promotion labs (which look like int // filler arrays) which are subject to changing their declared size // when finally retiring a PLAB; this also can cause the first disjunct @@ -393,13 +394,11 @@ // is_objArray() && is_forwarded() // covers first scenario above // || is_typeArray() // covers second scenario above // If and when UseParallelGC uses the same obj array oop stealing/chunking - // technique, or when G1 is integrated (and currently uses this array chunking - // technique) we will need to suitably modify the assertion. + // technique, we will need to suitably modify the assertion. assert((s == klass->oop_size(this)) || - (((UseParNewGC || UseParallelGC) && - Universe::heap()->is_gc_active()) && - (is_typeArray() || - (is_objArray() && is_forwarded()))), + (Universe::heap()->is_gc_active() && + ((is_typeArray() && UseParNewGC) || + (is_objArray() && is_forwarded() && (UseParNewGC || UseParallelGC || UseG1GC)))), "wrong array object size"); } else { // Must be zero, so bite the bullet and take the virtual call. @@ -426,16 +425,22 @@ oopDesc::bs()->write_ref_field(p, v); } +inline void update_barrier_set_pre(void* p, oop v) { + oopDesc::bs()->write_ref_field_pre(p, v); +} + template inline void oop_store(T* p, oop v) { if (always_do_update_barrier) { oop_store((volatile T*)p, v); } else { + update_barrier_set_pre(p, v); oopDesc::encode_store_heap_oop(p, v); update_barrier_set(p, v); } } template inline void oop_store(volatile T* p, oop v) { + update_barrier_set_pre((void*)p, v); // Used by release_obj_field_put, so use release_store_ptr. oopDesc::release_encode_store_heap_oop(p, v); update_barrier_set((void*)p, v); @@ -683,8 +688,19 @@ } ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DEFN) +#ifndef SERIALGC +#define OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +inline int oopDesc::oop_iterate_backwards(OopClosureType* blk) { \ + SpecializationStats::record_call(); \ + return blueprint()->oop_oop_iterate_backwards##nv_suffix(this, blk); \ +} + +ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DEFN) +#endif // !SERIALGC inline bool oopDesc::is_shared() const { return CompactingPermGenGen::is_shared(this); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/graphKit.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1180,6 +1180,12 @@ else reason = Deoptimization::Reason_div0_check; + // %%% Since Reason_unhandled is not recorded on a per-bytecode basis, + // ciMethodData::has_trap_at will return a conservative -1 if any + // must-be-null assertion has failed. This could cause performance + // problems for a method after its first do_null_assert failure. + // Consider using 'Reason_class_check' instead? + // To cause an implicit null check, we set the not-null probability // to the maximum (PROB_MAX). For an explicit check the probablity // is set to a smaller value. @@ -1366,6 +1372,10 @@ BarrierSet* bs = Universe::heap()->barrier_set(); set_control(ctl); switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt); + break; case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -1390,6 +1400,10 @@ BarrierSet* bs = Universe::heap()->barrier_set(); set_control(ctl); switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); + break; case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -3176,3 +3190,251 @@ } return NULL; } + +void GraphKit::g1_write_barrier_pre(Node* obj, + Node* adr, + uint alias_idx, + Node* val, + const Type* val_type, + BasicType bt) { + IdealKit ideal(gvn(), control(), merged_memory(), true); +#define __ ideal. + __ declares_done(); + + Node* thread = __ thread(); + + Node* no_ctrl = NULL; + Node* no_base = __ top(); + Node* zero = __ ConI(0); + + float likely = PROB_LIKELY(0.999); + float unlikely = PROB_UNLIKELY(0.999); + + BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; + assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width"); + + // Offsets into the thread + const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 648 + PtrQueue::byte_offset_of_active()); + const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 656 + PtrQueue::byte_offset_of_index()); + const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 652 + PtrQueue::byte_offset_of_buf()); + // Now the actual pointers into the thread + + // set_control( ctl); + + Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset)); + Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + + // Now some of the values + + Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); + Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); + Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + // if (!marking) + __ if_then(marking, BoolTest::ne, zero); { + + const Type* t1 = adr->bottom_type(); + const Type* t2 = val->bottom_type(); + + Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx); + // if (orig != NULL) + __ if_then(orig, BoolTest::ne, null()); { + + // load original value + // alias_idx correct?? + + // is the queue for this thread full? + __ if_then(index, BoolTest::ne, zero, likely); { + + // decrement the index + Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); + Node* next_indexX = next_index; +#ifdef _LP64 + // We could refine the type for what it's worth + // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); + next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); +#endif // _LP64 + + // Now get the buffer location we will log the original value into and store it + + Node *log_addr = __ AddP(no_base, buffer, next_indexX); + // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM)); + __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw); + + + // update the index + // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); + // This is a hack to force this store to occur before the oop store that is coming up + __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM)); + + } __ else_(); { + + // logging buffer is full, call the runtime + const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); + // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread); + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread); + } __ end_if(); + } __ end_if(); + } __ end_if(); + + __ drain_delay_transform(); + set_control( __ ctrl()); + set_all_memory( __ merged_memory()); + +#undef __ +} + +// +// Update the card table and add card address to the queue +// +void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) { +#define __ ideal-> + Node* zero = __ ConI(0); + Node* no_base = __ top(); + BasicType card_bt = T_BYTE; + // Smash zero into card. MUST BE ORDERED WRT TO STORE + __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw); + + // Now do the queue work + __ if_then(index, BoolTest::ne, zero); { + + Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); + Node* next_indexX = next_index; +#ifdef _LP64 + // We could refine the type for what it's worth + // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); + next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); +#endif // _LP64 + Node* log_addr = __ AddP(no_base, buffer, next_indexX); + + __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw); + __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); + + } __ else_(); { + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); + } __ end_if(); +#undef __ +} + +void GraphKit::g1_write_barrier_post(Node* store, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + BasicType bt, + bool use_precise) { + // If we are writing a NULL then we need no post barrier + + if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { + // Must be NULL + const Type* t = val->bottom_type(); + assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL"); + // No post barrier if writing NULLx + return; + } + + if (!use_precise) { + // All card marks for a (non-array) instance are in one place: + adr = obj; + } + // (Else it's an array (or unknown), and we want more precise card marks.) + assert(adr != NULL, ""); + + IdealKit ideal(gvn(), control(), merged_memory(), true); +#define __ ideal. + __ declares_done(); + + Node* thread = __ thread(); + + Node* no_ctrl = NULL; + Node* no_base = __ top(); + float likely = PROB_LIKELY(0.999); + float unlikely = PROB_UNLIKELY(0.999); + Node* zero = __ ConI(0); + Node* zeroX = __ ConX(0); + + // Get the alias_index for raw card-mark memory + const TypePtr* card_type = TypeRawPtr::BOTTOM; + + const TypeFunc *tf = OptoRuntime::g1_wb_post_Type(); + + // Get the address of the card table + CardTableModRefBS* ct = + (CardTableModRefBS*)(Universe::heap()->barrier_set()); + Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base)); + // Get base of card map + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + + // Offsets into the thread + const int index_offset = in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + + // Pointers into the thread + + Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + + // Now some values + + Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); + Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + + // Convert the store obj pointer to an int prior to doing math on it + // Use addr not obj gets accurate card marks + + // Node* cast = __ CastPX(no_ctrl, adr /* obj */); + + // Must use ctrl to prevent "integerized oop" existing across safepoint + Node* cast = __ CastPX(__ ctrl(), ( use_precise ? adr : obj )); + + // Divide pointer by card size + Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); + + // Combine card table base and card offset + Node *card_adr = __ AddP(no_base, card_table, card_offset ); + + // If we know the value being stored does it cross regions? + + if (val != NULL) { + // Does the store cause us to cross regions? + + // Should be able to do an unsigned compare of region_size instead of + // and extra shift. Do we have an unsigned compare?? + // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes); + Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); + + // if (xor_res == 0) same region so skip + __ if_then(xor_res, BoolTest::ne, zeroX); { + + // No barrier if we are storing a NULL + __ if_then(val, BoolTest::ne, null(), unlikely); { + + // Ok must mark the card if not already dirty + + // load the original value of the card + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + + __ if_then(card_val, BoolTest::ne, zero); { + g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + } __ end_if(); + } __ end_if(); + } __ end_if(); + } else { + g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + } + + + __ drain_delay_transform(); + set_control( __ ctrl()); + set_all_memory( __ merged_memory()); +#undef __ + +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/graphKit.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -24,6 +24,7 @@ class FastLockNode; class FastUnlockNode; +class IdealKit; class Parse; class RootNode; @@ -581,6 +582,27 @@ && Universe::heap()->can_elide_tlab_store_barriers()); } + // G1 pre/post barriers + void g1_write_barrier_pre(Node* obj, + Node* adr, + uint alias_idx, + Node* val, + const Type* val_type, + BasicType bt); + + void g1_write_barrier_post(Node* store, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + BasicType bt, + bool use_precise); + // Helper function for g1 + private: + void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, + Node* buffer, const TypeFunc* tf); + + public: // Helper function to round double arguments before a call void round_double_arguments(ciMethod* dest_method); void round_double_result(ciMethod* dest_method); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/lcm.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -585,7 +585,7 @@ // A few node types require changing a required edge to a precedence edge // before allocation. - if( UseConcMarkSweepGC ) { + if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/macro.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -923,21 +923,7 @@ Node* eden_end_adr; set_eden_pointers(eden_top_adr, eden_end_adr); - uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM); assert(ctrl != NULL, "must have control"); - - // Load Eden::end. Loop invariant and hoisted. - // - // Note: We set the control input on "eden_end" and "old_eden_top" when using - // a TLAB to work around a bug where these values were being moved across - // a safepoint. These are not oops, so they cannot be include in the oop - // map, but the can be changed by a GC. The proper way to fix this would - // be to set the raw memory state when generating a SafepointNode. However - // this will require extensive changes to the loop optimization in order to - // prevent a degradation of the optimization. - // See comment in memnode.hpp, around line 227 in class LoadPNode. - Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); - // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set enum { slow_result_path = 1, fast_result_path = 2 }; @@ -957,12 +943,15 @@ initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn); } - if (DTraceAllocProbes) { + if (DTraceAllocProbes || + !UseTLAB && (!Universe::heap()->supports_inline_contig_alloc() || + (UseConcMarkSweepGC && CMSIncrementalMode))) { // Force slow-path allocation always_slow = true; initial_slow_test = NULL; } + enum { too_big_or_final_path = 1, need_gc_path = 2 }; Node *slow_region = NULL; Node *toobig_false = ctrl; @@ -991,6 +980,23 @@ Node *slow_mem = mem; // save the current memory state for slow path // generate the fast allocation code unless we know that the initial test will always go slow if (!always_slow) { + Node* eden_top_adr; + Node* eden_end_adr; + + set_eden_pointers(eden_top_adr, eden_end_adr); + + // Load Eden::end. Loop invariant and hoisted. + // + // Note: We set the control input on "eden_end" and "old_eden_top" when using + // a TLAB to work around a bug where these values were being moved across + // a safepoint. These are not oops, so they cannot be include in the oop + // map, but the can be changed by a GC. The proper way to fix this would + // be to set the raw memory state when generating a SafepointNode. However + // this will require extensive changes to the loop optimization in order to + // prevent a degradation of the optimization. + // See comment in memnode.hpp, around line 227 in class LoadPNode. + Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); + // allocate the Region and Phi nodes for the result result_region = new (C, 3) RegionNode(3); result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM ); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/runtime.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -44,6 +44,8 @@ address OptoRuntime::_multianewarray3_Java = NULL; address OptoRuntime::_multianewarray4_Java = NULL; address OptoRuntime::_multianewarray5_Java = NULL; +address OptoRuntime::_g1_wb_pre_Java = NULL; +address OptoRuntime::_g1_wb_post_Java = NULL; address OptoRuntime::_vtable_must_compile_Java = NULL; address OptoRuntime::_complete_monitor_locking_Java = NULL; address OptoRuntime::_rethrow_Java = NULL; @@ -89,6 +91,8 @@ gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false); gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false); gen(env, _multianewarray5_Java , multianewarray5_Type , multianewarray5_C , 0 , true , false, false); + gen(env, _g1_wb_pre_Java , g1_wb_pre_Type , SharedRuntime::g1_wb_pre , 0 , false, false, false); + gen(env, _g1_wb_post_Java , g1_wb_post_Type , SharedRuntime::g1_wb_post , 0 , false, false, false); gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C , 0 , false, false, false); gen(env, _rethrow_Java , rethrow_Type , rethrow_C , 2 , true , false, true ); @@ -385,6 +389,33 @@ return multianewarray_Type(5); } +const TypeFunc *OptoRuntime::g1_wb_pre_Type() { + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); + + return TypeFunc::make(domain, range); +} + +const TypeFunc *OptoRuntime::g1_wb_post_Type() { + + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); + + return TypeFunc::make(domain, range); +} + const TypeFunc *OptoRuntime::uncommon_trap_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/opto/runtime.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -108,6 +108,8 @@ static address _multianewarray3_Java; static address _multianewarray4_Java; static address _multianewarray5_Java; + static address _g1_wb_pre_Java; + static address _g1_wb_post_Java; static address _vtable_must_compile_Java; static address _complete_monitor_locking_Java; static address _rethrow_Java; @@ -140,6 +142,8 @@ static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread); static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread); static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread); + static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread); + static void g1_wb_post_C(void* card_addr, JavaThread* thread); public: // Slow-path Locking and Unlocking @@ -195,6 +199,8 @@ static address multianewarray3_Java() { return _multianewarray3_Java; } static address multianewarray4_Java() { return _multianewarray4_Java; } static address multianewarray5_Java() { return _multianewarray5_Java; } + static address g1_wb_pre_Java() { return _g1_wb_pre_Java; } + static address g1_wb_post_Java() { return _g1_wb_post_Java; } static address vtable_must_compile_stub() { return _vtable_must_compile_Java; } static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; } @@ -232,6 +238,8 @@ static const TypeFunc* multianewarray3_Type(); // multianewarray static const TypeFunc* multianewarray4_Type(); // multianewarray static const TypeFunc* multianewarray5_Type(); // multianewarray + static const TypeFunc* g1_wb_pre_Type(); + static const TypeFunc* g1_wb_post_Type(); static const TypeFunc* complete_monitor_enter_Type(); static const TypeFunc* complete_monitor_exit_Type(); static const TypeFunc* uncommon_trap_Type(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvm.cpp --- a/src/share/vm/prims/jvm.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/prims/jvm.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -377,7 +377,11 @@ JVM_ENTRY_NO_ENV(jlong, JVM_FreeMemory(void)) JVMWrapper("JVM_FreeMemory"); CollectedHeap* ch = Universe::heap(); - size_t n = ch->capacity() - ch->used(); + size_t n; + { + MutexLocker x(Heap_lock); + n = ch->capacity() - ch->used(); + } return convert_size_t_to_jlong(n); JVM_END diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvmtiExport.cpp --- a/src/share/vm/prims/jvmtiExport.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/prims/jvmtiExport.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -2433,18 +2433,7 @@ // so we record the number of collections so that it can be checked in // the destructor. if (!_full) { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - assert(gch->n_gens() == 2, "configuration not recognized"); - _invocation_count = (unsigned int)gch->get_gen(1)->stat_record()->invocations; - } else { -#ifndef SERIALGC - assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap, "checking"); - _invocation_count = PSMarkSweep::total_invocations(); -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC - } + _invocation_count = Universe::heap()->total_full_collections(); } // Do clean up tasks that need to be done at a safepoint @@ -2466,20 +2455,7 @@ // generation but could have ended up doing a "full" GC - check the // GC count to see. if (!_full) { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - if (_invocation_count != (unsigned int)gch->get_gen(1)->stat_record()->invocations) { - _full = true; - } - } else { -#ifndef SERIALGC - if (_invocation_count != PSMarkSweep::total_invocations()) { - _full = true; - } -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC - } + _full = (_invocation_count != Universe::heap()->total_full_collections()); } // Full collection probably means the perm generation has been GC'ed diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvmtiTagMap.cpp --- a/src/share/vm/prims/jvmtiTagMap.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/prims/jvmtiTagMap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -400,16 +400,28 @@ // get the memory region used for the young generation void JvmtiTagMap::get_young_generation() { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - _young_gen = gch->get_gen(0)->reserved(); - } else { + CollectedHeap* ch = Universe::heap(); + switch (ch->kind()) { + case (CollectedHeap::GenCollectedHeap): { + _young_gen = ((GenCollectedHeap*)ch)->get_gen(0)->reserved(); + break; + } #ifndef SERIALGC - ParallelScavengeHeap* psh = ParallelScavengeHeap::heap(); - _young_gen= psh->young_gen()->reserved(); -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC + case (CollectedHeap::ParallelScavengeHeap): { + _young_gen = ((ParallelScavengeHeap*)ch)->young_gen()->reserved(); + break; + } + case (CollectedHeap::G1CollectedHeap): { + // Until a more satisfactory solution is implemented, all + // oops in the tag map will require rehash at each gc. + // This is a correct, if extremely inefficient solution. + // See RFE 6621729 for related commentary. + _young_gen = ch->reserved_region(); + break; + } +#endif // !SERIALGC + default: + ShouldNotReachHere(); } } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/prims/unsafe.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -891,6 +891,7 @@ oop e = JNIHandles::resolve(e_h); oop p = JNIHandles::resolve(obj); HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset); + update_barrier_set_pre((void*)addr, e); oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e); jboolean success = (res == e); if (success) diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/aprofiler.hpp --- a/src/share/vm/runtime/aprofiler.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/aprofiler.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -33,6 +33,7 @@ class AllocationProfiler: AllStatic { friend class GenCollectedHeap; + friend class G1CollectedHeap; friend class MarkSweep; private: static bool _active; // tells whether profiler is active diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/arguments.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -922,18 +922,17 @@ // UseParNewGC and not explicitly set ParallelGCThreads we // set it, unless this is a single cpu machine. void Arguments::set_parnew_gc_flags() { - assert(!UseSerialGC && !UseParallelGC, "control point invariant"); + assert(!UseSerialGC && !UseParallelGC && !UseG1GC, + "control point invariant"); + assert(UseParNewGC, "Error"); // Turn off AdaptiveSizePolicy by default for parnew until it is // complete. - if (UseParNewGC && - FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { + if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false); } - if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) { - FLAG_SET_DEFAULT(UseParNewGC, true); - } else if (UseParNewGC && ParallelGCThreads == 0) { + if (ParallelGCThreads == 0) { FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) { @@ -969,15 +968,12 @@ // further optimization and tuning efforts, and would almost // certainly gain from analysis of platform and environment. void Arguments::set_cms_and_parnew_gc_flags() { - if (UseSerialGC || UseParallelGC) { - return; - } - + assert(!UseSerialGC && !UseParallelGC, "Error"); assert(UseConcMarkSweepGC, "CMS is expected to be on here"); // If we are using CMS, we prefer to UseParNewGC, // unless explicitly forbidden. - if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) { + if (FLAG_IS_DEFAULT(UseParNewGC)) { FLAG_SET_ERGO(bool, UseParNewGC, true); } @@ -1157,6 +1153,7 @@ // machine class and automatic selection policy. if (!UseSerialGC && !UseConcMarkSweepGC && + !UseG1GC && !UseParNewGC && !DumpSharedSpaces && FLAG_IS_DEFAULT(UseParallelGC)) { @@ -1174,7 +1171,7 @@ // field offset to determine free list chunk markers. // Check that UseCompressedOops can be set with the max heap size allocated // by ergonomics. - if (!UseConcMarkSweepGC && MaxHeapSize <= max_heap_for_compressed_oops()) { + if (!UseG1GC && !UseConcMarkSweepGC && MaxHeapSize <= max_heap_for_compressed_oops()) { if (FLAG_IS_DEFAULT(UseCompressedOops)) { FLAG_SET_ERGO(bool, UseCompressedOops, true); } @@ -1183,6 +1180,8 @@ // If specified, give a warning if (UseConcMarkSweepGC){ warning("Compressed Oops does not work with CMS"); + } else if (UseG1GC) { + warning("Compressed Oops does not work with UseG1GC"); } else { warning( "Max heap size too large for Compressed Oops"); @@ -1196,6 +1195,7 @@ } void Arguments::set_parallel_gc_flags() { + assert(UseParallelGC || UseParallelOldGC, "Error"); // If parallel old was requested, automatically enable parallel scavenge. if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) { FLAG_SET_DEFAULT(UseParallelGC, true); @@ -1207,51 +1207,8 @@ FLAG_SET_ERGO(uintx, ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); - if (FLAG_IS_DEFAULT(MaxHeapSize)) { - const uint64_t reasonable_fraction = - os::physical_memory() / DefaultMaxRAMFraction; - const uint64_t maximum_size = (uint64_t) - (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ? - MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) : - DefaultMaxRAM); - size_t reasonable_max = - (size_t) os::allocatable_physical_memory(reasonable_fraction); - if (reasonable_max > maximum_size) { - reasonable_max = maximum_size; - } - if (PrintGCDetails && Verbose) { - // Cannot use gclog_or_tty yet. - tty->print_cr(" Max heap size for server class platform " - SIZE_FORMAT, reasonable_max); - } - // If the initial_heap_size has not been set with -Xms, - // then set it as fraction of size of physical memory - // respecting the maximum and minimum sizes of the heap. - if (initial_heap_size() == 0) { - const uint64_t reasonable_initial_fraction = - os::physical_memory() / DefaultInitialRAMFraction; - const size_t reasonable_initial = - (size_t) os::allocatable_physical_memory(reasonable_initial_fraction); - const size_t minimum_size = NewSize + OldSize; - set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max), - minimum_size)); - // Currently the minimum size and the initial heap sizes are the same. - set_min_heap_size(initial_heap_size()); - if (PrintGCDetails && Verbose) { - // Cannot use gclog_or_tty yet. - tty->print_cr(" Initial heap size for server class platform " - SIZE_FORMAT, initial_heap_size()); - } - } else { - // An minimum size was specified on the command line. Be sure - // that the maximum size is consistent. - if (initial_heap_size() > reasonable_max) { - reasonable_max = initial_heap_size(); - } - } - FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max); - } - + // PS is a server collector, setup the heap sizes accordingly. + set_server_heap_size(); // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the // SurvivorRatio has been set, reset their default values to SurvivorRatio + // 2. By doing this we make SurvivorRatio also work for Parallel Scavenger. @@ -1279,6 +1236,70 @@ } } +void Arguments::set_g1_gc_flags() { + assert(UseG1GC, "Error"); + // G1 is a server collector, setup the heap sizes accordingly. + set_server_heap_size(); +#ifdef COMPILER1 + FastTLABRefill = false; +#endif + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); + if (ParallelGCThreads == 0) { + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads +()); + } + no_shared_spaces(); +} + +void Arguments::set_server_heap_size() { + if (FLAG_IS_DEFAULT(MaxHeapSize)) { + const uint64_t reasonable_fraction = + os::physical_memory() / DefaultMaxRAMFraction; + const uint64_t maximum_size = (uint64_t) + (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ? + MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) : + DefaultMaxRAM); + size_t reasonable_max = + (size_t) os::allocatable_physical_memory(reasonable_fraction); + if (reasonable_max > maximum_size) { + reasonable_max = maximum_size; + } + if (PrintGCDetails && Verbose) { + // Cannot use gclog_or_tty yet. + tty->print_cr(" Max heap size for server class platform " + SIZE_FORMAT, reasonable_max); + } + // If the initial_heap_size has not been set with -Xms, + // then set it as fraction of size of physical memory + // respecting the maximum and minimum sizes of the heap. + if (initial_heap_size() == 0) { + const uint64_t reasonable_initial_fraction = + os::physical_memory() / DefaultInitialRAMFraction; + const size_t reasonable_initial = + (size_t) os::allocatable_physical_memory(reasonable_initial_fraction); + const size_t minimum_size = NewSize + OldSize; + set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max), + minimum_size)); + // Currently the minimum size and the initial heap sizes are the same. + set_min_heap_size(initial_heap_size()); + if (PrintGCDetails && Verbose) { + // Cannot use gclog_or_tty yet. + tty->print_cr(" Initial heap size for server class platform " + SIZE_FORMAT, initial_heap_size()); + } + } else { + // A minimum size was specified on the command line. Be sure + // that the maximum size is consistent. + if (initial_heap_size() > reasonable_max) { + reasonable_max = initial_heap_size(); + } + } + FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max); + } +} + // This must be called after ergonomics because we want bytecode rewriting // if the server compiler is used, or if UseSharedSpaces is disabled. void Arguments::set_bytecode_flags() { @@ -1362,12 +1383,13 @@ FLAG_SET_DEFAULT(UseConcMarkSweepGC, false); FLAG_SET_DEFAULT(UseParallelGC, false); FLAG_SET_DEFAULT(UseParallelOldGC, false); + FLAG_SET_DEFAULT(UseG1GC, false); } static bool verify_serial_gc_flags() { return (UseSerialGC && - !(UseParNewGC || UseConcMarkSweepGC || UseParallelGC || - UseParallelOldGC)); + !(UseParNewGC || UseConcMarkSweepGC || UseG1GC || + UseParallelGC || UseParallelOldGC)); } // Check consistency of GC selection @@ -1470,8 +1492,8 @@ status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); // Check user specified sharing option conflict with Parallel GC - bool cannot_share = (UseConcMarkSweepGC || UseParallelGC || - UseParallelOldGC || UseParNewGC || + bool cannot_share = (UseConcMarkSweepGC || UseG1GC || UseParNewGC || + UseParallelGC || UseParallelOldGC || SOLARIS_ONLY(UseISM) NOT_SOLARIS(UseLargePages)); if (cannot_share) { @@ -1511,11 +1533,6 @@ "The CMS collector (-XX:+UseConcMarkSweepGC) must be " "selected in order\nto use CMSIncrementalMode.\n"); status = false; - } else if (!UseTLAB) { - jio_fprintf(defaultStream::error_stream(), - "error: CMSIncrementalMode requires thread-local " - "allocation buffers\n(-XX:+UseTLAB).\n"); - status = false; } else { status = status && verify_percentage(CMSIncrementalDutyCycle, "CMSIncrementalDutyCycle"); @@ -1535,13 +1552,6 @@ } } - if (UseNUMA && !UseTLAB) { - jio_fprintf(defaultStream::error_stream(), - "error: NUMA allocator (-XX:+UseNUMA) requires thread-local " - "allocation\nbuffers (-XX:+UseTLAB).\n"); - status = false; - } - // CMS space iteration, which FLSVerifyAllHeapreferences entails, // insists that we hold the requisite locks so that the iteration is // MT-safe. For the verification at start-up and shut-down, we don't @@ -2330,10 +2340,15 @@ SOLARIS_ONLY(FLAG_SET_DEFAULT(UseMPSS, false)); SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false)); } + #else if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) { FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1); } + // Temporary disable bulk zeroing reduction with G1. See CR 6627983. + if (UseG1GC) { + FLAG_SET_DEFAULT(ReduceBulkZeroing, false); + } #endif if (!check_vm_args_consistency()) { @@ -2485,12 +2500,29 @@ } } + // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS jint result = parse_vm_init_args(args); if (result != JNI_OK) { return result; } + // These are hacks until G1 is fully supported and tested + // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works + if (UseG1GC) { + if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) { +#ifndef PRODUCT + tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC"); +#endif // PRODUCT + UseConcMarkSweepGC = false; + UseParNewGC = false; + UseParallelGC = false; + UseParallelOldGC = false; + UseSerialGC = false; + } + no_shared_spaces(); + } + #ifndef PRODUCT if (TraceBytecodesAt != 0) { TraceBytecodes = true; @@ -2536,6 +2568,12 @@ // Set some flags for ParNew set_parnew_gc_flags(); } + // Temporary; make the "if" an "else-if" before + // we integrate G1. XXX + if (UseG1GC) { + // Set some flags for garbage-first, if needed. + set_g1_gc_flags(); + } #ifdef SERIALGC assert(verify_serial_gc_flags(), "SerialGC unset"); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/arguments.hpp --- a/src/share/vm/runtime/arguments.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/arguments.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -294,10 +294,14 @@ // CMS/ParNew garbage collectors static void set_parnew_gc_flags(); static void set_cms_and_parnew_gc_flags(); - // UseParallelGC + // UseParallel[Old]GC static void set_parallel_gc_flags(); + // Garbage-First (UseG1GC) + static void set_g1_gc_flags(); // GC ergonomics static void set_ergonomics_flags(); + // Setup heap size for a server platform + static void set_server_heap_size(); // Based on automatic selection criteria, should the // low pause collector be used. static bool should_auto_select_low_pause_collector(); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals.cpp --- a/src/share/vm/runtime/globals.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/globals.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -174,6 +174,9 @@ static Flag flagTable[] = { RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT) RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT) +#ifndef SERIALGC + G1_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT) +#endif // SERIALGC #ifdef COMPILER1 C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT) #endif diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -1106,7 +1106,10 @@ /* gc */ \ \ product(bool, UseSerialGC, false, \ - "Tells whether the VM should use serial garbage collector") \ + "Use the serial garbage collector") \ + \ + product(bool, UseG1GC, false, \ + "Use the Garbage-First garbage collector") \ \ product(bool, UseParallelGC, false, \ "Use the Parallel Scavenge garbage collector") \ @@ -1169,6 +1172,9 @@ develop(bool, TraceChunkTasksQueuing, false, \ "Trace the queuing of the chunk tasks") \ \ + product(uintx, ParallelMarkingThreads, 0, \ + "Number of marking threads concurrent gc will use") \ + \ product(uintx, YoungPLABSize, 4096, \ "Size of young gen promotion labs (in HeapWords)") \ \ @@ -1265,6 +1271,12 @@ "The amount of young gen chosen by default per GC worker " \ "thread available ") \ \ + product(bool, GCOverheadReporting, false, \ + "Enables the GC overhead reporting facility") \ + \ + product(intx, GCOverheadReportingPeriodMS, 100, \ + "Reporting period for conc GC overhead reporting, in ms ") \ + \ product(bool, CMSIncrementalMode, false, \ "Whether CMS GC should operate in \"incremental\" mode") \ \ @@ -1593,6 +1605,9 @@ product(bool, ZeroTLAB, false, \ "Zero out the newly created TLAB") \ \ + product(bool, FastTLABRefill, true, \ + "Use fast TLAB refill code") \ + \ product(bool, PrintTLAB, false, \ "Print various TLAB related information") \ \ @@ -2790,6 +2805,12 @@ "how many entries we'll try to leave on the stack during " \ "parallel GC") \ \ + product(intx, DCQBarrierQueueBufferSize, 256, \ + "Number of elements in a dirty card queue buffer") \ + \ + product(intx, DCQBarrierProcessCompletedThreshold, 5, \ + "Number of completed dirty card buffers to trigger processing.") \ + \ /* stack parameters */ \ product_pd(intx, StackYellowPages, \ "Number of yellow zone (recoverable overflows) pages") \ diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals_extension.hpp --- a/src/share/vm/runtime/globals_extension.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/globals_extension.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -74,21 +74,16 @@ #endif typedef enum { - RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, - RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, - RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, - RUNTIME_PRODUCT_RW_FLAG_MEMBER, - RUNTIME_LP64_PRODUCT_FLAG_MEMBER) - RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, - RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, - RUNTIME_NOTPRODUCT_FLAG_MEMBER) + RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER) + RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER) +#ifndef KERNEL + G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER) +#endif #ifdef COMPILER1 - C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, - C1_NOTPRODUCT_FLAG_MEMBER) + C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, - C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) + C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) #endif NUM_CommandLineFlag } CommandLineFlag; @@ -144,24 +139,45 @@ #endif typedef enum { - RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, - RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE, RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE, RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE) -RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, - RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, - RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, - RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) +#ifndef KERNEL + G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE) +#endif // KERNEL #ifdef COMPILER1 - C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PRODUCT_FLAG_MEMBER_WITH_TYPE, - C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C1_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PRODUCT_FLAG_MEMBER_WITH_TYPE, - C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C2_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) #endif NUM_CommandLineFlagWithType } CommandLineFlagWithType; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/mutexLocker.cpp --- a/src/share/vm/runtime/mutexLocker.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/mutexLocker.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -47,7 +47,7 @@ Monitor* JNICritical_lock = NULL; Mutex* JvmtiThreadState_lock = NULL; Monitor* JvmtiPendingEvent_lock = NULL; -Mutex* Heap_lock = NULL; +Monitor* Heap_lock = NULL; Mutex* ExpandHeap_lock = NULL; Mutex* AdapterHandlerLibrary_lock = NULL; Mutex* SignatureHandlerLibrary_lock = NULL; @@ -67,7 +67,18 @@ Monitor* SLT_lock = NULL; Monitor* iCMS_lock = NULL; Monitor* FullGCCount_lock = NULL; +Monitor* CMark_lock = NULL; +Monitor* ZF_mon = NULL; +Monitor* Cleanup_mon = NULL; +Monitor* G1ConcRefine_mon = NULL; +Mutex* SATB_Q_FL_lock = NULL; +Monitor* SATB_Q_CBL_mon = NULL; +Mutex* Shared_SATB_Q_lock = NULL; +Mutex* DirtyCardQ_FL_lock = NULL; +Monitor* DirtyCardQ_CBL_mon = NULL; +Mutex* Shared_DirtyCardQ_lock = NULL; Mutex* ParGCRareEvent_lock = NULL; +Mutex* EvacFailureStack_lock = NULL; Mutex* DerivedPointerTableGC_lock = NULL; Mutex* Compile_lock = NULL; Monitor* MethodCompileQueue_lock = NULL; @@ -102,6 +113,9 @@ Mutex* PerfDataManager_lock = NULL; Mutex* OopMapCacheAlloc_lock = NULL; +Mutex* MMUTracker_lock = NULL; +Mutex* HotCardCache_lock = NULL; + Monitor* GCTaskManager_lock = NULL; Mutex* Management_lock = NULL; @@ -150,6 +164,23 @@ def(iCMS_lock , Monitor, special, true ); // CMS incremental mode start/stop notification def(FullGCCount_lock , Monitor, leaf, true ); // in support of ExplicitGCInvokesConcurrent } + if (UseG1GC) { + def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread + def(ZF_mon , Monitor, leaf, true ); + def(Cleanup_mon , Monitor, nonleaf, true ); + def(G1ConcRefine_mon , Monitor, nonleaf, true ); + def(SATB_Q_FL_lock , Mutex , special, true ); + def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); + def(Shared_SATB_Q_lock , Mutex, nonleaf, true ); + + def(DirtyCardQ_FL_lock , Mutex , special, true ); + def(DirtyCardQ_CBL_mon , Monitor, nonleaf, true ); + def(Shared_DirtyCardQ_lock , Mutex, nonleaf, true ); + + def(MMUTracker_lock , Mutex , leaf , true ); + def(HotCardCache_lock , Mutex , special , true ); + def(EvacFailureStack_lock , Mutex , nonleaf , true ); + } def(ParGCRareEvent_lock , Mutex , leaf , true ); def(DerivedPointerTableGC_lock , Mutex, leaf, true ); def(CodeCache_lock , Mutex , special, true ); @@ -203,7 +234,7 @@ def(SLT_lock , Monitor, nonleaf, false ); // used in CMS GC for locking PLL lock } - def(Heap_lock , Mutex , nonleaf+1, false); + def(Heap_lock , Monitor, nonleaf+1, false); def(JfieldIdCreation_lock , Mutex , nonleaf+1, true ); // jfieldID, Used in VM_Operation def(JNICachedItableIndex_lock , Mutex , nonleaf+1, false); // Used to cache an itable index during JNI invoke diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/mutexLocker.hpp --- a/src/share/vm/runtime/mutexLocker.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/mutexLocker.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -38,7 +38,7 @@ extern Monitor* JNICritical_lock; // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in extern Mutex* JvmtiThreadState_lock; // a lock on modification of JVMTI thread data extern Monitor* JvmtiPendingEvent_lock; // a lock on the JVMTI pending events list -extern Mutex* Heap_lock; // a lock on the heap +extern Monitor* Heap_lock; // a lock on the heap extern Mutex* ExpandHeap_lock; // a lock on expanding the heap extern Mutex* AdapterHandlerLibrary_lock; // a lock on the AdapterHandlerLibrary extern Mutex* SignatureHandlerLibrary_lock; // a lock on the SignatureHandlerLibrary @@ -60,8 +60,30 @@ extern Monitor* SLT_lock; // used in CMS GC for acquiring PLL extern Monitor* iCMS_lock; // CMS incremental mode start/stop notification extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc +extern Monitor* CMark_lock; // used for concurrent mark thread coordination +extern Monitor* ZF_mon; // used for G1 conc zero-fill. +extern Monitor* Cleanup_mon; // used for G1 conc cleanup. +extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine + // coordination. + +extern Mutex* SATB_Q_FL_lock; // Protects SATB Q + // buffer free list. +extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q + // completed buffer queue. +extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB + // queue shared by + // non-Java threads. + +extern Mutex* DirtyCardQ_FL_lock; // Protects dirty card Q + // buffer free list. +extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q + // completed buffer queue. +extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card + // queue shared by + // non-Java threads. // (see option ExplicitGCInvokesConcurrent) extern Mutex* ParGCRareEvent_lock; // Synchronizes various (rare) parallel GC ops. +extern Mutex* EvacFailureStack_lock; // guards the evac failure scan stack extern Mutex* Compile_lock; // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc) extern Monitor* MethodCompileQueue_lock; // a lock held when method compilations are enqueued, dequeued #ifdef TIERED @@ -93,6 +115,10 @@ extern Mutex* ParkerFreeList_lock; extern Mutex* OopMapCacheAlloc_lock; // protects allocation of oop_map caches +extern Mutex* MMUTracker_lock; // protects the MMU + // tracker data structures +extern Mutex* HotCardCache_lock; // protects the hot card cache + extern Mutex* Management_lock; // a lock used to serialize JVM management extern Monitor* LowMemory_lock; // a lock used for low memory detection diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/os.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -105,6 +105,18 @@ static jlong elapsed_counter(); static jlong elapsed_frequency(); + // The "virtual time" of a thread is the amount of time a thread has + // actually run. The first function indicates whether the OS supports + // this functionality for the current thread, and if so: + // * the second enables vtime tracking (if that is required). + // * the third tells whether vtime is enabled. + // * the fourth returns the elapsed virtual time for the current + // thread. + static bool supports_vtime(); + static bool enable_vtime(); + static bool vtime_enabled(); + static double elapsedVTime(); + // Return current local time in a string (YYYY-MM-DD HH:MM:SS). // It is MT safe, but not async-safe, as reading time zone // information may require a lock on some platforms. diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -111,6 +111,25 @@ } #endif // PRODUCT +#ifndef SERIALGC + +// G1 write-barrier pre: executed before a pointer store. +JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread)) + if (orig == NULL) { + assert(false, "should be optimized out"); + return; + } + // store the original value that was in the field reference + thread->satb_mark_queue().enqueue(orig); +JRT_END + +// G1 write-barrier post: executed after a pointer store. +JRT_LEAF(void, SharedRuntime::g1_wb_post(void* card_addr, JavaThread* thread)) + thread->dirty_card_queue().enqueue(card_addr); +JRT_END + +#endif // !SERIALGC + JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x)) return x * y; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/sharedRuntime.hpp --- a/src/share/vm/runtime/sharedRuntime.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/sharedRuntime.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -99,6 +99,12 @@ static address raw_exception_handler_for_return_address(address return_address); static address exception_handler_for_return_address(address return_address); +#ifndef SERIALGC + // G1 write barriers + static void g1_wb_pre(oopDesc* orig, JavaThread *thread); + static void g1_wb_post(void* card_addr, JavaThread* thread); +#endif // !SERIALGC + // exception handling and implicit exceptions static address compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception, bool force_unwind, bool top_frame_only); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/task.cpp --- a/src/share/vm/runtime/task.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/task.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -67,7 +67,6 @@ PeriodicTask::PeriodicTask(size_t interval_time) : _counter(0), _interval(interval_time) { - assert(is_init_completed(), "Periodic tasks should not start during VM initialization"); // Sanity check the interval time assert(_interval >= PeriodicTask::min_interval && _interval <= PeriodicTask::max_interval && diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/thread.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1138,6 +1138,10 @@ void JavaThread::initialize() { // Initialize fields + + // Set the claimed par_id to -1 (ie not claiming any par_ids) + set_claimed_par_id(-1); + set_saved_exception_pc(NULL); set_threadObj(NULL); _anchor.clear(); @@ -1209,7 +1213,18 @@ pd_initialize(); } -JavaThread::JavaThread(bool is_attaching) : Thread() { +#ifndef SERIALGC +SATBMarkQueueSet JavaThread::_satb_mark_queue_set; +DirtyCardQueueSet JavaThread::_dirty_card_queue_set; +#endif // !SERIALGC + +JavaThread::JavaThread(bool is_attaching) : + Thread() +#ifndef SERIALGC + , _satb_mark_queue(&_satb_mark_queue_set), + _dirty_card_queue(&_dirty_card_queue_set) +#endif // !SERIALGC +{ initialize(); _is_attaching = is_attaching; } @@ -1255,7 +1270,13 @@ // Remove this ifdef when C1 is ported to the compiler interface. static void compiler_thread_entry(JavaThread* thread, TRAPS); -JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : Thread() { +JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : + Thread() +#ifndef SERIALGC + , _satb_mark_queue(&_satb_mark_queue_set), + _dirty_card_queue(&_dirty_card_queue_set) +#endif // !SERIALGC +{ if (TraceThreadEvents) { tty->print_cr("creating thread %p", this); } @@ -3034,9 +3055,14 @@ #ifndef SERIALGC // Support for ConcurrentMarkSweep. This should be cleaned up - // and better encapsulated. XXX YSR - if (UseConcMarkSweepGC) { - ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD); + // and better encapsulated. The ugly nested if test would go away + // once things are properly refactored. XXX YSR + if (UseConcMarkSweepGC || UseG1GC) { + if (UseConcMarkSweepGC) { + ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD); + } else { + ConcurrentMarkThread::makeSurrogateLockerThread(THREAD); + } if (HAS_PENDING_EXCEPTION) { vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION)); } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/thread.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -783,6 +783,18 @@ } _jmp_ring[ jump_ring_buffer_size ]; #endif /* PRODUCT */ +#ifndef SERIALGC + // Support for G1 barriers + + ObjPtrQueue _satb_mark_queue; // Thread-local log for SATB barrier. + // Set of all such queues. + static SATBMarkQueueSet _satb_mark_queue_set; + + DirtyCardQueue _dirty_card_queue; // Thread-local log for dirty cards. + // Set of all such queues. + static DirtyCardQueueSet _dirty_card_queue_set; +#endif // !SERIALGC + friend class VMThread; friend class ThreadWaitTransition; friend class VM_Exit; @@ -1168,6 +1180,11 @@ static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); } +#ifndef SERIALGC + static ByteSize satb_mark_queue_offset() { return byte_offset_of(JavaThread, _satb_mark_queue); } + static ByteSize dirty_card_queue_offset() { return byte_offset_of(JavaThread, _dirty_card_queue); } +#endif // !SERIALGC + // Returns the jni environment for this thread JNIEnv* jni_environment() { return &_jni_environment; } @@ -1414,6 +1431,20 @@ _stack_size_at_create = value; } +#ifndef SERIALGC + // SATB marking queue support + ObjPtrQueue& satb_mark_queue() { return _satb_mark_queue; } + static SATBMarkQueueSet& satb_mark_queue_set() { + return _satb_mark_queue_set; + } + + // Dirty card queue support + DirtyCardQueue& dirty_card_queue() { return _dirty_card_queue; } + static DirtyCardQueueSet& dirty_card_queue_set() { + return _dirty_card_queue_set; + } +#endif // !SERIALGC + // Machine dependent stuff #include "incls/_thread_pd.hpp.incl" @@ -1445,6 +1476,14 @@ // clearing/querying jni attach status bool is_attaching() const { return _is_attaching; } void set_attached() { _is_attaching = false; OrderAccess::fence(); } +private: + // This field is used to determine if a thread has claimed + // a par_id: it is -1 if the thread has not claimed a par_id; + // otherwise its value is the par_id that has been claimed. + int _claimed_par_id; +public: + int get_claimed_par_id() { return _claimed_par_id; } + void set_claimed_par_id(int id) { _claimed_par_id = id;} }; // Inline implementation of JavaThread::current diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/virtualspace.cpp --- a/src/share/vm/runtime/virtualspace.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/virtualspace.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -235,24 +235,16 @@ // increase size to a multiple of the desired alignment size = align_size_up(size, alignment); size_t extra_size = size + alignment; - char* extra_base = os::reserve_memory(extra_size, NULL, alignment); - if (extra_base == NULL) return; - // Do manual alignement - base = (char*) align_size_up((uintptr_t) extra_base, alignment); - assert(base >= extra_base, "just checking"); - // Release unused areas - size_t unused_bottom_size = base - extra_base; - size_t unused_top_size = extra_size - size - unused_bottom_size; - assert(unused_bottom_size % os::vm_allocation_granularity() == 0, - "size not allocation aligned"); - assert(unused_top_size % os::vm_allocation_granularity() == 0, - "size not allocation aligned"); - if (unused_bottom_size > 0) { - os::release_memory(extra_base, unused_bottom_size); - } - if (unused_top_size > 0) { - os::release_memory(base + size, unused_top_size); - } + do { + char* extra_base = os::reserve_memory(extra_size, NULL, alignment); + if (extra_base == NULL) return; + // Do manual alignement + base = (char*) align_size_up((uintptr_t) extra_base, alignment); + assert(base >= extra_base, "just checking"); + // Re-reserve the region at the aligned base address. + os::release_memory(extra_base, extra_size); + base = os::reserve_memory(size, base); + } while (base == NULL); } } // Done diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -302,7 +302,7 @@ nonstatic_field(CardTableModRefBS, _guard_region, MemRegion) \ nonstatic_field(CardTableModRefBS, byte_map_base, jbyte*) \ \ - nonstatic_field(CardTableRS, _ct_bs, CardTableModRefBS) \ + nonstatic_field(CardTableRS, _ct_bs, CardTableModRefBSForCTRS*) \ \ nonstatic_field(CollectedHeap, _reserved, MemRegion) \ nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ @@ -993,6 +993,7 @@ declare_toplevel_type(BarrierSet) \ declare_type(ModRefBarrierSet, BarrierSet) \ declare_type(CardTableModRefBS, ModRefBarrierSet) \ + declare_type(CardTableModRefBSForCTRS, CardTableModRefBS) \ declare_toplevel_type(GenRemSet) \ declare_type(CardTableRS, GenRemSet) \ declare_toplevel_type(BlockOffsetSharedArray) \ @@ -1020,6 +1021,10 @@ declare_toplevel_type(BlockOffsetSharedArray*) \ declare_toplevel_type(GenRemSet*) \ declare_toplevel_type(CardTableRS*) \ + declare_toplevel_type(CardTableModRefBS*) \ + declare_toplevel_type(CardTableModRefBS**) \ + declare_toplevel_type(CardTableModRefBSForCTRS*) \ + declare_toplevel_type(CardTableModRefBSForCTRS**) \ declare_toplevel_type(CollectedHeap*) \ declare_toplevel_type(ContiguousSpace*) \ declare_toplevel_type(DefNewGeneration*) \ diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/vm_operations.hpp --- a/src/share/vm/runtime/vm_operations.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/runtime/vm_operations.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -53,8 +53,13 @@ template(ParallelGCFailedAllocation) \ template(ParallelGCFailedPermanentAllocation) \ template(ParallelGCSystemGC) \ + template(CGC_Operation) \ template(CMS_Initial_Mark) \ template(CMS_Final_Remark) \ + template(G1CollectFull) \ + template(G1CollectForAllocation) \ + template(G1IncCollectionPause) \ + template(G1PopRegionCollectionPause) \ template(EnableBiasedLocking) \ template(RevokeBias) \ template(BulkRevokeBias) \ diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/services/heapDumper.cpp --- a/src/share/vm/services/heapDumper.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/services/heapDumper.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -1578,17 +1578,8 @@ } // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1 - size_t used; + size_t used = ch->used(); const char* header; -#ifndef SERIALGC - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - used = GenCollectedHeap::heap()->used(); - } else { - used = ParallelScavengeHeap::heap()->used(); - } -#else // SERIALGC - used = GenCollectedHeap::heap()->used(); -#endif // SERIALGC if (used > (size_t)SegmentedHeapDumpThreshold) { set_segmented_dump(); header = "JAVA PROFILE 1.0.2"; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/services/memoryService.cpp --- a/src/share/vm/services/memoryService.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/services/memoryService.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -59,9 +59,13 @@ add_parallel_scavenge_heap_info(ParallelScavengeHeap::heap()); break; } + case CollectedHeap::G1CollectedHeap : { + G1CollectedHeap::g1_unimplemented(); + return; + } #endif // SERIALGC default: { - guarantee(false, "Not recognized kind of heap"); + guarantee(false, "Unrecognized kind of heap"); } } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.cpp --- a/src/share/vm/utilities/bitMap.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/bitMap.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -26,54 +26,59 @@ # include "incls/_bitMap.cpp.incl" -BitMap::BitMap(idx_t* map, idx_t size_in_bits) { +BitMap::BitMap(bm_word_t* map, idx_t size_in_bits) : + _map(map), _size(size_in_bits) +{ + assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption."); assert(size_in_bits >= 0, "just checking"); - _map = map; - _size = size_in_bits; } -BitMap::BitMap(idx_t size_in_bits) { - assert(size_in_bits >= 0, "just checking"); - _size = size_in_bits; - _map = NEW_RESOURCE_ARRAY(idx_t, size_in_words()); +BitMap::BitMap(idx_t size_in_bits, bool in_resource_area) : + _map(NULL), _size(0) +{ + assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption."); + resize(size_in_bits, in_resource_area); } -void BitMap::resize(idx_t size_in_bits) { +void BitMap::verify_index(idx_t index) const { + assert(index < _size, "BitMap index out of bounds"); +} + +void BitMap::verify_range(idx_t beg_index, idx_t end_index) const { +#ifdef ASSERT + assert(beg_index <= end_index, "BitMap range error"); + // Note that [0,0) and [size,size) are both valid ranges. + if (end_index != _size) verify_index(end_index); +#endif +} + +void BitMap::resize(idx_t size_in_bits, bool in_resource_area) { assert(size_in_bits >= 0, "just checking"); - size_t old_size_in_words = size_in_words(); - uintptr_t* old_map = map(); + idx_t old_size_in_words = size_in_words(); + bm_word_t* old_map = map(); + _size = size_in_bits; - size_t new_size_in_words = size_in_words(); - _map = NEW_RESOURCE_ARRAY(idx_t, new_size_in_words); - Copy::disjoint_words((HeapWord*) old_map, (HeapWord*) _map, MIN2(old_size_in_words, new_size_in_words)); + idx_t new_size_in_words = size_in_words(); + if (in_resource_area) { + _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words); + } else { + if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map); + _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words); + } + Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map, + MIN2(old_size_in_words, new_size_in_words)); if (new_size_in_words > old_size_in_words) { clear_range_of_words(old_size_in_words, size_in_words()); } } -// Returns a bit mask for a range of bits [beg, end) within a single word. Each -// bit in the mask is 0 if the bit is in the range, 1 if not in the range. The -// returned mask can be used directly to clear the range, or inverted to set the -// range. Note: end must not be 0. -inline BitMap::idx_t -BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const { - assert(end != 0, "does not work when end == 0"); - assert(beg == end || word_index(beg) == word_index(end - 1), - "must be a single-word range"); - idx_t mask = bit_mask(beg) - 1; // low (right) bits - if (bit_in_word(end) != 0) { - mask |= ~(bit_mask(end) - 1); // high (left) bits - } - return mask; -} - void BitMap::set_range_within_word(idx_t beg, idx_t end) { // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. if (beg != end) { - idx_t mask = inverted_bit_mask_for_range(beg, end); + bm_word_t mask = inverted_bit_mask_for_range(beg, end); *word_addr(beg) |= ~mask; } } @@ -82,7 +87,7 @@ // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. if (beg != end) { - idx_t mask = inverted_bit_mask_for_range(beg, end); + bm_word_t mask = inverted_bit_mask_for_range(beg, end); *word_addr(beg) &= mask; } } @@ -105,20 +110,6 @@ } } -inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) { - memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t)); -} - -inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) { - memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t)); -} - -inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const { - idx_t bit_rounded_up = bit + (BitsPerWord - 1); - // Check for integer arithmetic overflow. - return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words(); -} - void BitMap::set_range(idx_t beg, idx_t end) { verify_range(beg, end); @@ -187,6 +178,64 @@ clear_range_within_word(bit_index(end_full_word), end); } +void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap, + idx_t from_start_index, + idx_t to_start_index, + size_t word_num) { + // Ensure that the parameters are correct. + // These shouldn't be that expensive to check, hence I left them as + // guarantees. + guarantee(from_bitmap->bit_in_word(from_start_index) == 0, + "it should be aligned on a word boundary"); + guarantee(bit_in_word(to_start_index) == 0, + "it should be aligned on a word boundary"); + guarantee(word_num >= 2, "word_num should be at least 2"); + + intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index); + intptr_t* to = (intptr_t*) word_addr(to_start_index); + + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + while (true) { + intptr_t old_value = *to; + intptr_t new_value = old_value | *from; + intptr_t res = Atomic::cmpxchg_ptr(new_value, to, old_value); + if (res == old_value) break; + } + } + ++from; + ++to; + + for (size_t i = 0; i < word_num - 2; ++i) { + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + assert(*to == 0, "nobody else should be writing here"); + intptr_t new_value = *from; + *to = new_value; + } + + ++from; + ++to; + } + + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + while (true) { + intptr_t old_value = *to; + intptr_t new_value = old_value | *from; + intptr_t res = Atomic::cmpxchg_ptr(new_value, to, old_value); + if (res == old_value) break; + } + } + + // the -1 is because we didn't advance them after the final CAS + assert(from == + (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1, + "invariant"); + assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1, + "invariant"); +} + void BitMap::at_put(idx_t offset, bool value) { if (value) { set_bit(offset); @@ -282,11 +331,11 @@ bool BitMap::contains(const BitMap other) const { assert(size() == other.size(), "must have same size"); - uintptr_t* dest_map = map(); - uintptr_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { - uintptr_t word_union = dest_map[index] | other_map[index]; + bm_word_t word_union = dest_map[index] | other_map[index]; // If this has more bits set than dest_map[index], then other is not a // subset. if (word_union != dest_map[index]) return false; @@ -296,8 +345,8 @@ bool BitMap::intersects(const BitMap other) const { assert(size() == other.size(), "must have same size"); - uintptr_t* dest_map = map(); - uintptr_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { if ((dest_map[index] & other_map[index]) != 0) return true; @@ -308,8 +357,8 @@ void BitMap::set_union(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { dest_map[index] = dest_map[index] | other_map[index]; @@ -319,8 +368,8 @@ void BitMap::set_difference(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { dest_map[index] = dest_map[index] & ~(other_map[index]); @@ -330,8 +379,8 @@ void BitMap::set_intersection(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { dest_map[index] = dest_map[index] & other_map[index]; @@ -339,11 +388,26 @@ } +void BitMap::set_intersection_at_offset(BitMap other, idx_t offset) { + assert(other.size() >= offset, "offset not in range"); + assert(other.size() - offset >= size(), "other not large enough"); + // XXX Ideally, we would remove this restriction. + guarantee((offset % (sizeof(bm_word_t) * BitsPerByte)) == 0, + "Only handle aligned cases so far."); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); + idx_t offset_word_ind = word_index(offset); + idx_t size = size_in_words(); + for (idx_t index = 0; index < size; index++) { + dest_map[index] = dest_map[index] & other_map[offset_word_ind + index]; + } +} + bool BitMap::set_union_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { idx_t temp = map(index) | other_map[index]; @@ -357,11 +421,11 @@ bool BitMap::set_difference_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { - idx_t temp = dest_map[index] & ~(other_map[index]); + bm_word_t temp = dest_map[index] & ~(other_map[index]); changed = changed || (temp != dest_map[index]); dest_map[index] = temp; } @@ -372,12 +436,12 @@ bool BitMap::set_intersection_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { - idx_t orig = dest_map[index]; - idx_t temp = orig & other_map[index]; + bm_word_t orig = dest_map[index]; + bm_word_t temp = orig & other_map[index]; changed = changed || (temp != orig); dest_map[index] = temp; } @@ -387,8 +451,8 @@ void BitMap::set_from(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { dest_map[index] = other_map[index]; @@ -398,8 +462,8 @@ bool BitMap::is_same(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { if (dest_map[index] != other_map[index]) return false; @@ -408,24 +472,24 @@ } bool BitMap::is_full() const { - uintptr_t* word = map(); + bm_word_t* word = map(); idx_t rest = size(); for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) { - if (*word != (uintptr_t) AllBits) return false; + if (*word != (bm_word_t) AllBits) return false; word++; } - return rest == 0 || (*word | ~right_n_bits((int)rest)) == (uintptr_t) AllBits; + return rest == 0 || (*word | ~right_n_bits((int)rest)) == (bm_word_t) AllBits; } bool BitMap::is_empty() const { - uintptr_t* word = map(); + bm_word_t* word = map(); idx_t rest = size(); for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) { - if (*word != (uintptr_t) NoBits) return false; + if (*word != (bm_word_t) NoBits) return false; word++; } - return rest == 0 || (*word & right_n_bits((int)rest)) == (uintptr_t) NoBits; + return rest == 0 || (*word & right_n_bits((int)rest)) == (bm_word_t) NoBits; } void BitMap::clear_large() { @@ -436,7 +500,7 @@ // then modifications in and to the left of the _bit_ being // currently sampled will not be seen. Note also that the // interval [leftOffset, rightOffset) is right open. -void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { +bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { verify_range(leftOffset, rightOffset); idx_t startIndex = word_index(leftOffset); @@ -445,106 +509,71 @@ offset < rightOffset && index < endIndex; offset = (++index) << LogBitsPerWord) { idx_t rest = map(index) >> (offset & (BitsPerWord - 1)); - for (; offset < rightOffset && rest != (uintptr_t)NoBits; offset++) { + for (; offset < rightOffset && rest != (bm_word_t)NoBits; offset++) { if (rest & 1) { - blk->do_bit(offset); + if (!blk->do_bit(offset)) return false; // resample at each closure application // (see, for instance, CMS bug 4525989) rest = map(index) >> (offset & (BitsPerWord -1)); - // XXX debugging: remove - // The following assertion assumes that closure application - // doesn't clear bits (may not be true in general, e.g. G1). - assert(rest & 1, - "incorrect shift or closure application can clear bits?"); } rest = rest >> 1; } } + return true; +} + +BitMap::idx_t* BitMap::_pop_count_table = NULL; + +void BitMap::init_pop_count_table() { + if (_pop_count_table == NULL) { + BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256); + for (uint i = 0; i < 256; i++) { + table[i] = num_set_bits(i); + } + + intptr_t res = Atomic::cmpxchg_ptr((intptr_t) table, + (intptr_t*) &_pop_count_table, + (intptr_t) NULL_WORD); + if (res != NULL_WORD) { + guarantee( _pop_count_table == (void*) res, "invariant" ); + FREE_C_HEAP_ARRAY(bm_word_t, table); + } + } } -BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset, - idx_t r_offset) const { - assert(l_offset <= size(), "BitMap index out of bounds"); - assert(r_offset <= size(), "BitMap index out of bounds"); - assert(l_offset <= r_offset, "l_offset > r_offset ?"); - - if (l_offset == r_offset) { - return l_offset; - } - idx_t index = word_index(l_offset); - idx_t r_index = word_index(r_offset-1) + 1; - idx_t res_offset = l_offset; +BitMap::idx_t BitMap::num_set_bits(bm_word_t w) { + idx_t bits = 0; - // check bits including and to the _left_ of offset's position - idx_t pos = bit_in_word(res_offset); - idx_t res = map(index) >> pos; - if (res != (uintptr_t)NoBits) { - // find the position of the 1-bit - for (; !(res & 1); res_offset++) { - res = res >> 1; + while (w != 0) { + while ((w & 1) == 0) { + w >>= 1; } - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); + bits++; + w >>= 1; } - // skip over all word length 0-bit runs - for (index++; index < r_index; index++) { - res = map(index); - if (res != (uintptr_t)NoBits) { - // found a 1, return the offset - for (res_offset = index << LogBitsPerWord; !(res & 1); - res_offset++) { - res = res >> 1; - } - assert(res & 1, "tautology; see loop condition"); - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); - } - } - return r_offset; + return bits; } -BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset, - idx_t r_offset) const { - assert(l_offset <= size(), "BitMap index out of bounds"); - assert(r_offset <= size(), "BitMap index out of bounds"); - assert(l_offset <= r_offset, "l_offset > r_offset ?"); - - if (l_offset == r_offset) { - return l_offset; - } - idx_t index = word_index(l_offset); - idx_t r_index = word_index(r_offset-1) + 1; - idx_t res_offset = l_offset; - - // check bits including and to the _left_ of offset's position - idx_t pos = res_offset & (BitsPerWord - 1); - idx_t res = (map(index) >> pos) | left_n_bits((int)pos); +BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) { + assert(_pop_count_table != NULL, "precondition"); + return _pop_count_table[c]; +} - if (res != (uintptr_t)AllBits) { - // find the position of the 0-bit - for (; res & 1; res_offset++) { - res = res >> 1; - } - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); - } - // skip over all word length 1-bit runs - for (index++; index < r_index; index++) { - res = map(index); - if (res != (uintptr_t)AllBits) { - // found a 0, return the offset - for (res_offset = index << LogBitsPerWord; res & 1; - res_offset++) { - res = res >> 1; - } - assert(!(res & 1), "tautology; see loop condition"); - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); +BitMap::idx_t BitMap::count_one_bits() const { + init_pop_count_table(); // If necessary. + idx_t sum = 0; + typedef unsigned char uchar; + for (idx_t i = 0; i < size_in_words(); i++) { + bm_word_t w = map()[i]; + for (size_t j = 0; j < sizeof(bm_word_t); j++) { + sum += num_set_bits_from_table(uchar(w & 255)); + w >>= 8; } } - return r_offset; + return sum; } + #ifndef PRODUCT void BitMap::print_on(outputStream* st) const { @@ -558,7 +587,7 @@ #endif -BitMap2D::BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot) +BitMap2D::BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot) : _bits_per_slot(bits_per_slot) , _map(map, size_in_slots * bits_per_slot) { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.hpp --- a/src/share/vm/utilities/bitMap.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/bitMap.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -22,25 +22,19 @@ * */ -// Closure for iterating over BitMaps +// Forward decl; +class BitMapClosure; -class BitMapClosure VALUE_OBJ_CLASS_SPEC { - public: - // Callback when bit in map is set - virtual void do_bit(size_t offset) = 0; -}; - - -// Operations for bitmaps represented as arrays of unsigned 32- or 64-bit -// integers (uintptr_t). -// -// Bit offsets are numbered from 0 to size-1 +// Operations for bitmaps represented as arrays of unsigned integers. +// Bit offsets are numbered from 0 to size-1. class BitMap VALUE_OBJ_CLASS_SPEC { friend class BitMap2D; public: typedef size_t idx_t; // Type used for bit and word indices. + typedef uintptr_t bm_word_t; // Element type of array that represents + // the bitmap. // Hints for range sizes. typedef enum { @@ -48,8 +42,8 @@ } RangeSizeHint; private: - idx_t* _map; // First word in bitmap - idx_t _size; // Size of bitmap (in bits) + bm_word_t* _map; // First word in bitmap + idx_t _size; // Size of bitmap (in bits) // Puts the given value at the given offset, using resize() to size // the bitmap appropriately if needed using factor-of-two expansion. @@ -62,7 +56,7 @@ // Return a mask that will select the specified bit, when applied to the word // containing the bit. - static idx_t bit_mask(idx_t bit) { return (idx_t)1 << bit_in_word(bit); } + static bm_word_t bit_mask(idx_t bit) { return (bm_word_t)1 << bit_in_word(bit); } // Return the index of the word containing the specified bit. static idx_t word_index(idx_t bit) { return bit >> LogBitsPerWord; } @@ -71,66 +65,68 @@ static idx_t bit_index(idx_t word) { return word << LogBitsPerWord; } // Return the array of bitmap words, or a specific word from it. - idx_t* map() const { return _map; } - idx_t map(idx_t word) const { return _map[word]; } + bm_word_t* map() const { return _map; } + bm_word_t map(idx_t word) const { return _map[word]; } // Return a pointer to the word containing the specified bit. - idx_t* word_addr(idx_t bit) const { return map() + word_index(bit); } + bm_word_t* word_addr(idx_t bit) const { return map() + word_index(bit); } // Set a word to a specified value or to all ones; clear a word. - void set_word (idx_t word, idx_t val) { _map[word] = val; } + void set_word (idx_t word, bm_word_t val) { _map[word] = val; } void set_word (idx_t word) { set_word(word, ~(uintptr_t)0); } void clear_word(idx_t word) { _map[word] = 0; } // Utilities for ranges of bits. Ranges are half-open [beg, end). // Ranges within a single word. - inline idx_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const; - inline void set_range_within_word (idx_t beg, idx_t end); - inline void clear_range_within_word (idx_t beg, idx_t end); - inline void par_put_range_within_word (idx_t beg, idx_t end, bool value); + bm_word_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const; + void set_range_within_word (idx_t beg, idx_t end); + void clear_range_within_word (idx_t beg, idx_t end); + void par_put_range_within_word (idx_t beg, idx_t end, bool value); // Ranges spanning entire words. - inline void set_range_of_words (idx_t beg, idx_t end); - inline void clear_range_of_words (idx_t beg, idx_t end); - inline void set_large_range_of_words (idx_t beg, idx_t end); - inline void clear_large_range_of_words (idx_t beg, idx_t end); + void set_range_of_words (idx_t beg, idx_t end); + void clear_range_of_words (idx_t beg, idx_t end); + void set_large_range_of_words (idx_t beg, idx_t end); + void clear_large_range_of_words (idx_t beg, idx_t end); // The index of the first full word in a range. - inline idx_t word_index_round_up(idx_t bit) const; + idx_t word_index_round_up(idx_t bit) const; // Verification, statistics. - void verify_index(idx_t index) const { - assert(index < _size, "BitMap index out of bounds"); - } + void verify_index(idx_t index) const; + void verify_range(idx_t beg_index, idx_t end_index) const; - void verify_range(idx_t beg_index, idx_t end_index) const { -#ifdef ASSERT - assert(beg_index <= end_index, "BitMap range error"); - // Note that [0,0) and [size,size) are both valid ranges. - if (end_index != _size) verify_index(end_index); -#endif - } + static idx_t* _pop_count_table; + static void init_pop_count_table(); + static idx_t num_set_bits(bm_word_t w); + static idx_t num_set_bits_from_table(unsigned char c); public: // Constructs a bitmap with no map, and size 0. BitMap() : _map(NULL), _size(0) {} - // Construction - BitMap(idx_t* map, idx_t size_in_bits); + // Constructs a bitmap with the given map and size. + BitMap(bm_word_t* map, idx_t size_in_bits); - // Allocates necessary data structure in resource area - BitMap(idx_t size_in_bits); + // Constructs an empty bitmap of the given size (that is, this clears the + // new bitmap). Allocates the map array in resource area if + // "in_resource_area" is true, else in the C heap. + BitMap(idx_t size_in_bits, bool in_resource_area = true); - void set_map(idx_t* map) { _map = map; } + // Set the map and size. + void set_map(bm_word_t* map) { _map = map; } void set_size(idx_t size_in_bits) { _size = size_in_bits; } - // Allocates necessary data structure in resource area. + // Allocates necessary data structure, either in the resource area + // or in the C heap, as indicated by "in_resource_area." // Preserves state currently in bit map by copying data. // Zeros any newly-addressable bits. - // Does not perform any frees (i.e., of current _map). - void resize(idx_t size_in_bits); + // If "in_resource_area" is false, frees the current map. + // (Note that this assumes that all calls to "resize" on the same BitMap + // use the same value for "in_resource_area".) + void resize(idx_t size_in_bits, bool in_resource_area = true); // Accessing idx_t size() const { return _size; } @@ -157,11 +153,11 @@ // Set or clear the specified bit. inline void set_bit(idx_t bit); - inline void clear_bit(idx_t bit); + void clear_bit(idx_t bit); // Atomically set or clear the specified bit. - inline bool par_set_bit(idx_t bit); - inline bool par_clear_bit(idx_t bit); + bool par_set_bit(idx_t bit); + bool par_clear_bit(idx_t bit); // Put the given value at the given offset. The parallel version // will CAS the value into the bitmap and is quite a bit slower. @@ -183,23 +179,61 @@ // Update a range of bits, using a hint about the size. Currently only // inlines the predominant case of a 1-bit range. Works best when hint is a // compile-time constant. - inline void set_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void clear_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void par_clear_range (idx_t beg, idx_t end, RangeSizeHint hint); + void set_range(idx_t beg, idx_t end, RangeSizeHint hint); + void clear_range(idx_t beg, idx_t end, RangeSizeHint hint); + void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint); + void par_clear_range (idx_t beg, idx_t end, RangeSizeHint hint); + + // It performs the union operation between subsets of equal length + // of two bitmaps (the target bitmap of the method and the + // from_bitmap) and stores the result to the target bitmap. The + // from_start_index represents the first bit index of the subrange + // of the from_bitmap. The to_start_index is the equivalent of the + // target bitmap. Both indexes should be word-aligned, i.e. they + // should correspond to the first bit on a bitmap word (it's up to + // the caller to ensure this; the method does check it). The length + // of the subset is specified with word_num and it is in number of + // bitmap words. The caller should ensure that this is at least 2 + // (smaller ranges are not support to save extra checks). Again, + // this is checked in the method. + // + // Atomicity concerns: it is assumed that any contention on the + // target bitmap with other threads will happen on the first and + // last words; the ones in between will be "owned" exclusively by + // the calling thread and, in fact, they will already be 0. So, the + // method performs a CAS on the first word, copies the next + // word_num-2 words, and finally performs a CAS on the last word. + void mostly_disjoint_range_union(BitMap* from_bitmap, + idx_t from_start_index, + idx_t to_start_index, + size_t word_num); + // Clearing - void clear(); void clear_large(); + inline void clear(); - // Iteration support - void iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex); - inline void iterate(BitMapClosure* blk) { + // Iteration support. Returns "true" if the iteration completed, false + // if the iteration terminated early (because the closure "blk" returned + // false). + bool iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex); + bool iterate(BitMapClosure* blk) { // call the version that takes an interval - iterate(blk, 0, size()); + return iterate(blk, 0, size()); } - // Looking for 1's and 0's to the "right" + // Looking for 1's and 0's at indices equal to or greater than "l_index", + // stopping if none has been found before "r_index", and returning + // "r_index" (which must be at most "size") in that case. + idx_t get_next_one_offset_inline (idx_t l_index, idx_t r_index) const; + idx_t get_next_zero_offset_inline(idx_t l_index, idx_t r_index) const; + + // Like "get_next_one_offset_inline", except requires that "r_index" is + // aligned to bitsizeof(bm_word_t). + idx_t get_next_one_offset_inline_aligned_right(idx_t l_index, + idx_t r_index) const; + + // Non-inline versionsof the above. idx_t get_next_one_offset (idx_t l_index, idx_t r_index) const; idx_t get_next_zero_offset(idx_t l_index, idx_t r_index) const; @@ -210,12 +244,8 @@ return get_next_zero_offset(offset, size()); } - - - // Find the next one bit in the range [beg_bit, end_bit), or return end_bit if - // no one bit is found. Equivalent to get_next_one_offset(), but inline for - // use in performance-critical code. - inline idx_t find_next_one_bit(idx_t beg_bit, idx_t end_bit) const; + // Returns the number of bits set in the bitmap. + idx_t count_one_bits() const; // Set operations. void set_union(BitMap bits); @@ -232,6 +262,15 @@ bool set_difference_with_result(BitMap bits); bool set_intersection_with_result(BitMap bits); + // Requires the submap of "bits" starting at offset to be at least as + // large as "this". Modifies "this" to be the intersection of its + // current contents and the submap of "bits" starting at "offset" of the + // same length as "this." + // (For expedience, currently requires the offset to be aligned to the + // bitsize of a uintptr_t. This should go away in the future though it + // will probably remain a good case to optimize.) + void set_intersection_at_offset(BitMap bits, idx_t offset); + void set_from(BitMap bits); bool is_same(BitMap bits); @@ -248,58 +287,13 @@ #endif }; -inline void BitMap::set_bit(idx_t bit) { - verify_index(bit); - *word_addr(bit) |= bit_mask(bit); -} - -inline void BitMap::clear_bit(idx_t bit) { - verify_index(bit); - *word_addr(bit) &= ~bit_mask(bit); -} - -inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - set_bit(beg); - } else { - if (hint == large_range) { - set_large_range(beg, end); - } else { - set_range(beg, end); - } - } -} - -inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - clear_bit(beg); - } else { - if (hint == large_range) { - clear_large_range(beg, end); - } else { - clear_range(beg, end); - } - } -} - -inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - par_at_put(beg, true); - } else { - if (hint == large_range) { - par_at_put_large_range(beg, end, true); - } else { - par_at_put_range(beg, end, true); - } - } -} - // Convenience class wrapping BitMap which provides multiple bits per slot. class BitMap2D VALUE_OBJ_CLASS_SPEC { public: - typedef size_t idx_t; // Type used for bit and word indices. - + typedef BitMap::idx_t idx_t; // Type used for bit and word indices. + typedef BitMap::bm_word_t bm_word_t; // Element type of array that + // represents the bitmap. private: BitMap _map; idx_t _bits_per_slot; @@ -314,7 +308,7 @@ public: // Construction. bits_per_slot must be greater than 0. - BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot); + BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot); // Allocates necessary data structure in resource area. bits_per_slot must be greater than 0. BitMap2D(idx_t size_in_slots, idx_t bits_per_slot); @@ -359,38 +353,14 @@ _map.at_put_grow(bit_index(slot_index, bit_within_slot_index), value); } - void clear() { - _map.clear(); - } + void clear(); }; - - -inline void BitMap::set_range_of_words(idx_t beg, idx_t end) { - uintptr_t* map = _map; - for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0; -} - - -inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) { - uintptr_t* map = _map; - for (idx_t i = beg; i < end; ++i) map[i] = 0; -} - +// Closure for iterating over BitMaps -inline void BitMap::clear() { - clear_range_of_words(0, size_in_words()); -} - - -inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - par_at_put(beg, false); - } else { - if (hint == large_range) { - par_at_put_large_range(beg, end, false); - } else { - par_at_put_range(beg, end, false); - } - } -} +class BitMapClosure VALUE_OBJ_CLASS_SPEC { + public: + // Callback when bit in map is set. Should normally return "true"; + // return of false indicates that the bitmap iteration should terminate. + virtual bool do_bit(BitMap::idx_t offset) = 0; +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.inline.hpp --- a/src/share/vm/utilities/bitMap.inline.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/bitMap.inline.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -22,6 +22,17 @@ * */ + +inline void BitMap::set_bit(idx_t bit) { + verify_index(bit); + *word_addr(bit) |= bit_mask(bit); +} + +inline void BitMap::clear_bit(idx_t bit) { + verify_index(bit); + *word_addr(bit) &= ~bit_mask(bit); +} + inline bool BitMap::par_set_bit(idx_t bit) { verify_index(bit); volatile idx_t* const addr = word_addr(bit); @@ -64,42 +75,236 @@ } while (true); } -inline BitMap::idx_t -BitMap::find_next_one_bit(idx_t beg_bit, idx_t end_bit) const -{ - verify_range(beg_bit, end_bit); - assert(bit_in_word(end_bit) == 0, "end_bit not word-aligned"); +inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + set_bit(beg); + } else { + if (hint == large_range) { + set_large_range(beg, end); + } else { + set_range(beg, end); + } + } +} + +inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + clear_bit(beg); + } else { + if (hint == large_range) { + clear_large_range(beg, end); + } else { + clear_range(beg, end); + } + } +} + +inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + par_at_put(beg, true); + } else { + if (hint == large_range) { + par_at_put_large_range(beg, end, true); + } else { + par_at_put_range(beg, end, true); + } + } +} - if (beg_bit == end_bit) { - return beg_bit; - } +inline void BitMap::set_range_of_words(idx_t beg, idx_t end) { + bm_word_t* map = _map; + for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0; +} + + +inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) { + bm_word_t* map = _map; + for (idx_t i = beg; i < end; ++i) map[i] = 0; +} + + +inline void BitMap::clear() { + clear_range_of_words(0, size_in_words()); +} + - idx_t index = word_index(beg_bit); - idx_t r_index = word_index(end_bit); - idx_t res_bit = beg_bit; +inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + par_at_put(beg, false); + } else { + if (hint == large_range) { + par_at_put_large_range(beg, end, false); + } else { + par_at_put_range(beg, end, false); + } + } +} + +inline BitMap::idx_t +BitMap::get_next_one_offset_inline(idx_t l_offset, idx_t r_offset) const { + assert(l_offset <= size(), "BitMap index out of bounds"); + assert(r_offset <= size(), "BitMap index out of bounds"); + assert(l_offset <= r_offset, "l_offset > r_offset ?"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset-1) + 1; + idx_t res_offset = l_offset; // check bits including and to the _left_ of offset's position - idx_t res = map(index) >> bit_in_word(res_bit); - if (res != (uintptr_t) NoBits) { + idx_t pos = bit_in_word(res_offset); + idx_t res = map(index) >> pos; + if (res != (uintptr_t)NoBits) { // find the position of the 1-bit - for (; !(res & 1); res_bit++) { + for (; !(res & 1); res_offset++) { res = res >> 1; } - assert(res_bit >= beg_bit && res_bit < end_bit, "just checking"); - return res_bit; + assert(res_offset >= l_offset && + res_offset < r_offset, "just checking"); + return MIN2(res_offset, r_offset); } // skip over all word length 0-bit runs for (index++; index < r_index; index++) { res = map(index); - if (res != (uintptr_t) NoBits) { + if (res != (uintptr_t)NoBits) { // found a 1, return the offset - for (res_bit = bit_index(index); !(res & 1); res_bit++) { + for (res_offset = bit_index(index); !(res & 1); res_offset++) { res = res >> 1; } assert(res & 1, "tautology; see loop condition"); - assert(res_bit >= beg_bit && res_bit < end_bit, "just checking"); - return res_bit; + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); + } + } + return r_offset; +} + +inline BitMap::idx_t +BitMap::get_next_zero_offset_inline(idx_t l_offset, idx_t r_offset) const { + assert(l_offset <= size(), "BitMap index out of bounds"); + assert(r_offset <= size(), "BitMap index out of bounds"); + assert(l_offset <= r_offset, "l_offset > r_offset ?"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset-1) + 1; + idx_t res_offset = l_offset; + + // check bits including and to the _left_ of offset's position + idx_t pos = res_offset & (BitsPerWord - 1); + idx_t res = (map(index) >> pos) | left_n_bits((int)pos); + + if (res != (uintptr_t)AllBits) { + // find the position of the 0-bit + for (; res & 1; res_offset++) { + res = res >> 1; + } + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); + } + // skip over all word length 1-bit runs + for (index++; index < r_index; index++) { + res = map(index); + if (res != (uintptr_t)AllBits) { + // found a 0, return the offset + for (res_offset = index << LogBitsPerWord; res & 1; + res_offset++) { + res = res >> 1; + } + assert(!(res & 1), "tautology; see loop condition"); + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); } } - return end_bit; + return r_offset; +} + +inline BitMap::idx_t +BitMap::get_next_one_offset_inline_aligned_right(idx_t l_offset, + idx_t r_offset) const +{ + verify_range(l_offset, r_offset); + assert(bit_in_word(r_offset) == 0, "r_offset not word-aligned"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset); + idx_t res_offset = l_offset; + + // check bits including and to the _left_ of offset's position + idx_t res = map(index) >> bit_in_word(res_offset); + if (res != (uintptr_t)NoBits) { + // find the position of the 1-bit + for (; !(res & 1); res_offset++) { + res = res >> 1; + } + assert(res_offset >= l_offset && + res_offset < r_offset, "just checking"); + return res_offset; + } + // skip over all word length 0-bit runs + for (index++; index < r_index; index++) { + res = map(index); + if (res != (uintptr_t)NoBits) { + // found a 1, return the offset + for (res_offset = bit_index(index); !(res & 1); res_offset++) { + res = res >> 1; + } + assert(res & 1, "tautology; see loop condition"); + assert(res_offset >= l_offset && res_offset < r_offset, "just checking"); + return res_offset; + } + } + return r_offset; } + + +// Returns a bit mask for a range of bits [beg, end) within a single word. Each +// bit in the mask is 0 if the bit is in the range, 1 if not in the range. The +// returned mask can be used directly to clear the range, or inverted to set the +// range. Note: end must not be 0. +inline BitMap::bm_word_t +BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const { + assert(end != 0, "does not work when end == 0"); + assert(beg == end || word_index(beg) == word_index(end - 1), + "must be a single-word range"); + bm_word_t mask = bit_mask(beg) - 1; // low (right) bits + if (bit_in_word(end) != 0) { + mask |= ~(bit_mask(end) - 1); // high (left) bits + } + return mask; +} + +inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) { + memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t)); +} + +inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) { + memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t)); +} + +inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const { + idx_t bit_rounded_up = bit + (BitsPerWord - 1); + // Check for integer arithmetic overflow. + return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words(); +} + +inline BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset, + idx_t r_offset) const { + return get_next_one_offset_inline(l_offset, r_offset); +} + +inline BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset, + idx_t r_offset) const { + return get_next_zero_offset_inline(l_offset, r_offset); +} + +inline void BitMap2D::clear() { + _map.clear(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/debug.cpp --- a/src/share/vm/utilities/debug.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/debug.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -666,7 +666,7 @@ oop target; void do_oop(oop* o) { if (o != NULL && *o == target) { - tty->print_cr("0x%08x", o); + tty->print_cr(INTPTR_FORMAT, o); } } void do_oop(narrowOop* o) { ShouldNotReachHere(); } @@ -685,13 +685,13 @@ static void findref(intptr_t x) { - GenCollectedHeap *gch = GenCollectedHeap::heap(); + CollectedHeap *ch = Universe::heap(); LookForRefInGenClosure lookFor; lookFor.target = (oop) x; LookForRefInObjectClosure look_in_object((oop) x); tty->print_cr("Searching heap:"); - gch->object_iterate(&look_in_object); + ch->object_iterate(&look_in_object); tty->print_cr("Searching strong roots:"); Universe::oops_do(&lookFor, false); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/intHisto.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/intHisto.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,64 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_intHisto.cpp.incl" + +IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) { + assert(0 <= est && est <= max, "Preconditions"); + _elements = new (ResourceObj::C_HEAP) GrowableArray(est, true); + guarantee(_elements != NULL, "alloc failure"); +} + +void IntHistogram::add_entry(int outcome) { + if (outcome > _max) outcome = _max; + int new_count = _elements->at_grow(outcome) + 1; + _elements->at_put(outcome, new_count); + _tot++; +} + +int IntHistogram::entries_for_outcome(int outcome) { + return _elements->at_grow(outcome); +} + +void IntHistogram::print_on(outputStream* st) const { + double tot_d = (double)_tot; + st->print_cr("Outcome # of occurrences %% of occurrences"); + st->print_cr("-----------------------------------------------"); + for (int i=0; i < _elements->length()-2; i++) { + int cnt = _elements->at(i); + if (cnt != 0) { + st->print_cr("%7d %10d %8.4f", + i, cnt, (double)cnt/tot_d); + } + } + // Does it have any max entries? + if (_elements->length()-1 == _max) { + int cnt = _elements->at(_max); + st->print_cr(">= %4d %10d %8.4f", + _max, cnt, (double)cnt/tot_d); + } + st->print_cr("-----------------------------------------------"); + st->print_cr(" All %10d %8.4f", _tot, 1.0); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/intHisto.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/intHisto.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,70 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// This class implements a simple histogram. + +// A histogram summarizes a series of "measurements", each of which is +// assumed (required in this implementation) to have an outcome that is a +// non-negative integer. The histogram efficiently maps measurement outcomes +// to the number of measurements had that outcome. + +// To print the results, invoke print() on your Histogram*. + +// Note: there is already an existing "Histogram" class, in file +// histogram.{hpp,cpp}, but to my mind that's not a histogram, it's a table +// mapping strings to counts. To be a histogram (IMHO) it needs to map +// numbers (in fact, integers) to number of occurrences of that number. + +// ysr: (i am not sure i agree with the above note.) i suspect we want to have a +// histogram template that will map an arbitrary type (with a defined order +// relation) to a count. + + +class IntHistogram : public CHeapObj { + protected: + int _max; + int _tot; + GrowableArray* _elements; + +public: + // Create a new, empty table. "est" is an estimate of the maximum outcome + // that will be added, and "max" is an outcome such that all outcomes at + // least that large will be bundled with it. + IntHistogram(int est, int max); + // Add a measurement with the given outcome to the sequence. + void add_entry(int outcome); + // Return the number of entries recorded so far with the given outcome. + int entries_for_outcome(int outcome); + // Return the total number of entries recorded so far. + int total_entries() { return _tot; } + // Return the number of entries recorded so far with the given outcome as + // a fraction of the total number recorded so far. + double fraction_for_outcome(int outcome) { + return + (double)entries_for_outcome(outcome)/ + (double)total_entries(); + } + // Print the histogram on the given output stream. + void print_on(outputStream* st) const; +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/numberSeq.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/numberSeq.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,243 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_numberSeq.cpp.incl" + +AbsSeq::AbsSeq(double alpha) : + _num(0), _sum(0.0), _sum_of_squares(0.0), + _davg(0.0), _dvariance(0.0), _alpha(alpha) { +} + +void AbsSeq::add(double val) { + if (_num == 0) { + // if the sequence is empty, the davg is the same as the value + _davg = val; + // and the variance is 0 + _dvariance = 0.0; + } else { + // otherwise, calculate both + _davg = (1.0 - _alpha) * val + _alpha * _davg; + double diff = val - _davg; + _dvariance = (1.0 - _alpha) * diff * diff + _alpha * _dvariance; + } +} + +double AbsSeq::avg() const { + if (_num == 0) + return 0.0; + else + return _sum / total(); +} + +double AbsSeq::variance() const { + if (_num <= 1) + return 0.0; + + double x_bar = avg(); + double result = _sum_of_squares / total() - x_bar * x_bar; + if (result < 0.0) { + // due to loss-of-precision errors, the variance might be negative + // by a small bit + + // guarantee(-0.1 < result && result < 0.0, + // "if variance is negative, it should be very small"); + result = 0.0; + } + return result; +} + +double AbsSeq::sd() const { + double var = variance(); + guarantee( var >= 0.0, "variance should not be negative" ); + return sqrt(var); +} + +double AbsSeq::davg() const { + return _davg; +} + +double AbsSeq::dvariance() const { + if (_num <= 1) + return 0.0; + + double result = _dvariance; + if (result < 0.0) { + // due to loss-of-precision errors, the variance might be negative + // by a small bit + + guarantee(-0.1 < result && result < 0.0, + "if variance is negative, it should be very small"); + result = 0.0; + } + return result; +} + +double AbsSeq::dsd() const { + double var = dvariance(); + guarantee( var >= 0.0, "variance should not be negative" ); + return sqrt(var); +} + +NumberSeq::NumberSeq(double alpha) : + AbsSeq(alpha), _maximum(0.0), _last(0.0) { +} + +bool NumberSeq::check_nums(NumberSeq *total, int n, NumberSeq **parts) { + for (int i = 0; i < n; ++i) { + if (parts[i] != NULL && total->num() != parts[i]->num()) + return false; + } + return true; +} + +NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) { + guarantee(check_nums(total, n, parts), "all seq lengths should match"); + double sum = total->sum(); + for (int i = 0; i < n; ++i) { + if (parts[i] != NULL) + sum -= parts[i]->sum(); + } + + _num = total->num(); + _sum = sum; + + // we do not calculate these... + _sum_of_squares = -1.0; + _maximum = -1.0; + _davg = -1.0; + _dvariance = -1.0; +} + +void NumberSeq::add(double val) { + AbsSeq::add(val); + + _last = val; + if (_num == 0) { + _maximum = val; + } else { + if (val > _maximum) + _maximum = val; + } + _sum += val; + _sum_of_squares += val * val; + ++_num; +} + + +TruncatedSeq::TruncatedSeq(int length, double alpha): + AbsSeq(alpha), _length(length), _next(0) { + _sequence = NEW_C_HEAP_ARRAY(double, _length); + for (int i = 0; i < _length; ++i) + _sequence[i] = 0.0; +} + +void TruncatedSeq::add(double val) { + AbsSeq::add(val); + + // get the oldest value in the sequence... + double old_val = _sequence[_next]; + // ...remove it from the sum and sum of squares + _sum -= old_val; + _sum_of_squares -= old_val * old_val; + + // ...and update them with the new value + _sum += val; + _sum_of_squares += val * val; + + // now replace the old value with the new one + _sequence[_next] = val; + _next = (_next + 1) % _length; + + // only increase it if the buffer is not full + if (_num < _length) + ++_num; + + guarantee( variance() > -1.0, "variance should be >= 0" ); +} + +// can't easily keep track of this incrementally... +double TruncatedSeq::maximum() const { + if (_num == 0) + return 0.0; + double ret = _sequence[0]; + for (int i = 1; i < _num; ++i) { + double val = _sequence[i]; + if (val > ret) + ret = val; + } + return ret; +} + +double TruncatedSeq::last() const { + if (_num == 0) + return 0.0; + unsigned last_index = (_next + _length - 1) % _length; + return _sequence[last_index]; +} + +double TruncatedSeq::oldest() const { + if (_num == 0) + return 0.0; + else if (_num < _length) + // index 0 always oldest value until the array is full + return _sequence[0]; + else { + // since the array is full, _next is over the oldest value + return _sequence[_next]; + } +} + +double TruncatedSeq::predict_next() const { + if (_num == 0) + return 0.0; + + double num = (double) _num; + double x_squared_sum = 0.0; + double x_sum = 0.0; + double y_sum = 0.0; + double xy_sum = 0.0; + double x_avg = 0.0; + double y_avg = 0.0; + + int first = (_next + _length - _num) % _length; + for (int i = 0; i < _num; ++i) { + double x = (double) i; + double y = _sequence[(first + i) % _length]; + + x_squared_sum += x * x; + x_sum += x; + y_sum += y; + xy_sum += x * y; + } + x_avg = x_sum / num; + y_avg = y_sum / num; + + double Sxx = x_squared_sum - x_sum * x_sum / num; + double Sxy = xy_sum - x_sum * y_sum / num; + double b1 = Sxy / Sxx; + double b0 = y_avg - b1 * x_avg; + + return b0 + b1 * num; +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/numberSeq.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/numberSeq.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -0,0 +1,117 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/** + ** This file contains a few classes that represent number sequence, + ** x1, x2, x3, ..., xN, and can calculate their avg, max, and sd. + ** + ** Here's a quick description of the classes: + ** + ** AbsSeq: abstract superclass + ** NumberSeq: the sequence is assumed to be very long and the + ** maximum, avg, sd, davg, and dsd are calculated over all its elements + ** TruncatedSeq: this class keeps track of the last L elements + ** of the sequence and calculates avg, max, and sd only over them + **/ + +#define DEFAULT_ALPHA_VALUE 0.7 + +class AbsSeq { +private: + void init(double alpha); + +protected: + int _num; // the number of elements in the sequence + double _sum; // the sum of the elements in the sequence + double _sum_of_squares; // the sum of squares of the elements in the sequence + + double _davg; // decaying average + double _dvariance; // decaying variance + double _alpha; // factor for the decaying average / variance + + // This is what we divide with to get the average. In a standard + // number sequence, this should just be the number of elements in it. + virtual double total() const { return (double) _num; }; + +public: + AbsSeq(double alpha = DEFAULT_ALPHA_VALUE); + + virtual void add(double val); // adds a new element to the sequence + void add(unsigned val) { add((double) val); } + virtual double maximum() const = 0; // maximum element in the sequence + virtual double last() const = 0; // last element added in the sequence + + // the number of elements in the sequence + int num() const { return _num; } + // the sum of the elements in the sequence + double sum() const { return _sum; } + + double avg() const; // the average of the sequence + double variance() const; // the variance of the sequence + double sd() const; // the standard deviation of the sequence + + double davg() const; // decaying average + double dvariance() const; // decaying variance + double dsd() const; // decaying "standard deviation" +}; + +class NumberSeq: public AbsSeq { +private: + bool check_nums(NumberSeq* total, int n, NumberSeq** parts); + +protected: + double _last; + double _maximum; // keep track of maximum value + +public: + NumberSeq(double alpha = DEFAULT_ALPHA_VALUE); + NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts); + + virtual void add(double val); + virtual double maximum() const { return _maximum; } + virtual double last() const { return _last; } +}; + +class TruncatedSeq: public AbsSeq { +private: + enum PrivateConstants { + DefaultSeqLength = 10 + }; + void init(); +protected: + double *_sequence; // buffers the last L elements in the sequence + int _length; // this is L + int _next; // oldest slot in the array, i.e. next to be overwritten + +public: + // accepts a value for L + TruncatedSeq(int length = DefaultSeqLength, + double alpha = DEFAULT_ALPHA_VALUE); + virtual void add(double val); + virtual double maximum() const; + virtual double last() const; // the last value added to the sequence + + double oldest() const; // the oldest valid value in the sequence + double predict_next() const; // prediction based on linear regression +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/ostream.cpp --- a/src/share/vm/utilities/ostream.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/ostream.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -188,6 +188,17 @@ print_raw(buf); } +void outputStream::stamp(bool guard, + const char* prefix, + const char* suffix) { + if (!guard) { + return; + } + print_raw(prefix); + stamp(); + print_raw(suffix); +} + void outputStream::date_stamp(bool guard, const char* prefix, const char* suffix) { diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/ostream.hpp --- a/src/share/vm/utilities/ostream.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/ostream.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -86,6 +86,10 @@ // Time stamp TimeStamp& time_stamp() { return _stamp; } void stamp(); + void stamp(bool guard, const char* prefix, const char* suffix); + void stamp(bool guard) { + stamp(guard, "", ": "); + } // Date stamp void date_stamp(bool guard, const char* prefix, const char* suffix); // A simplified call that includes a suffix of ": " diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/taskqueue.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -65,7 +65,8 @@ os::sleep(Thread::current(), millis, false); } -bool ParallelTaskTerminator::offer_termination() { +bool +ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { Atomic::inc(&_offered_termination); juint yield_count = 0; @@ -91,7 +92,8 @@ sleep(WorkStealingSleepMillis); } - if (peek_in_queue_set()) { + if (peek_in_queue_set() || + (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); return false; } diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -120,6 +120,11 @@ return dirty_size(_bottom, get_top()); } + void set_empty() { + _bottom = 0; + _age = Age(); + } + // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. juint max_elems() { return n() - 2; } @@ -155,6 +160,9 @@ // Delete any resource associated with the queue. ~GenericTaskQueue(); + // apply the closure to all elements in the task queue + void oops_do(OopClosure* f); + private: // Element array. volatile E* _elems; @@ -172,6 +180,24 @@ } template +void GenericTaskQueue::oops_do(OopClosure* f) { + // tty->print_cr("START OopTaskQueue::oops_do"); + int iters = size(); + juint index = _bottom; + for (int i = 0; i < iters; ++i) { + index = decrement_index(index); + // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, + // index, &_elems[index], _elems[index]); + E* t = (E*)&_elems[index]; // cast away volatility + oop* p = (oop*)t; + assert((*t)->is_oop_or_null(), "Not an oop or null"); + f->do_oop(p); + } + // tty->print_cr("END OopTaskQueue::oops_do"); +} + + +template bool GenericTaskQueue::push_slow(E t, juint dirty_n_elems) { if (dirty_n_elems == n() - 1) { // Actually means 0, so do the push. @@ -383,6 +409,12 @@ return false; } +// When to terminate from the termination protocol. +class TerminatorTerminator: public CHeapObj { +public: + virtual bool should_exit_termination() = 0; +}; + // A class to aid in the termination of a set of parallel tasks using // TaskQueueSet's for work stealing. @@ -407,7 +439,14 @@ // else is. If returns "true", all threads are terminated. If returns // "false", available work has been observed in one of the task queues, // so the global task is not complete. - bool offer_termination(); + bool offer_termination() { + return offer_termination(NULL); + } + + // As above, but it also terminates of the should_exit_termination() + // method of the terminator parameter returns true. If terminator is + // NULL, then it is ignored. + bool offer_termination(TerminatorTerminator* terminator); // Reset the terminator, so that it may be reused again. // The caller is responsible for ensuring that this is done diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/workgroup.cpp --- a/src/share/vm/utilities/workgroup.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/workgroup.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -28,13 +28,19 @@ // Definitions of WorkGang methods. AbstractWorkGang::AbstractWorkGang(const char* name, - bool are_GC_threads) : + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : _name(name), - _are_GC_threads(are_GC_threads) { + _are_GC_task_threads(are_GC_task_threads), + _are_ConcurrentGC_threads(are_ConcurrentGC_threads) { + + assert(!(are_GC_task_threads && are_ConcurrentGC_threads), + "They cannot both be STW GC and Concurrent threads" ); + // Other initialization. _monitor = new Monitor(/* priority */ Mutex::leaf, /* name */ "WorkGroup monitor", - /* allow_vm_block */ are_GC_threads); + /* allow_vm_block */ are_GC_task_threads); assert(monitor() != NULL, "Failed to allocate monitor"); _terminate = false; _task = NULL; @@ -44,16 +50,21 @@ } WorkGang::WorkGang(const char* name, - int workers, - bool are_GC_threads) : - AbstractWorkGang(name, are_GC_threads) { + int workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : + AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) +{ // Save arguments. _total_workers = workers; + if (TraceWorkGang) { tty->print_cr("Constructing work gang %s with %d threads", name, workers); } _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); - assert(gang_workers() != NULL, "Failed to allocate gang workers"); + if (gang_workers() == NULL) { + vm_exit_out_of_memory(0, "Cannot create GangWorker array."); + } for (int worker = 0; worker < total_workers(); worker += 1) { GangWorker* new_worker = new GangWorker(this, worker); assert(new_worker != NULL, "Failed to allocate GangWorker"); @@ -285,7 +296,11 @@ } bool GangWorker::is_GC_task_thread() const { - return gang()->are_GC_threads(); + return gang()->are_GC_task_threads(); +} + +bool GangWorker::is_ConcurrentGC_thread() const { + return gang()->are_ConcurrentGC_threads(); } void GangWorker::print_on(outputStream* st) const { @@ -312,26 +327,43 @@ WorkGangBarrierSync::WorkGangBarrierSync() : _monitor(Mutex::safepoint, "work gang barrier sync", true), - _n_workers(0), _n_completed(0) { + _n_workers(0), _n_completed(0), _should_reset(false) { } WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name) : _monitor(Mutex::safepoint, name, true), - _n_workers(n_workers), _n_completed(0) { + _n_workers(n_workers), _n_completed(0), _should_reset(false) { } void WorkGangBarrierSync::set_n_workers(int n_workers) { _n_workers = n_workers; _n_completed = 0; + _should_reset = false; } void WorkGangBarrierSync::enter() { MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); + if (should_reset()) { + // The should_reset() was set and we are the first worker to enter + // the sync barrier. We will zero the n_completed() count which + // effectively resets the barrier. + zero_completed(); + set_should_reset(false); + } inc_completed(); if (n_completed() == n_workers()) { + // At this point we would like to reset the barrier to be ready in + // case it is used again. However, we cannot set n_completed() to + // 0, even after the notify_all(), given that some other workers + // might still be waiting for n_completed() to become == + // n_workers(). So, if we set n_completed() to 0, those workers + // will get stuck (as they will wake up, see that n_completed() != + // n_workers() and go back to sleep). Instead, we raise the + // should_reset() flag and the barrier will be reset the first + // time a worker enters it again. + set_should_reset(true); monitor()->notify_all(); - } - else { + } else { while (n_completed() != n_workers()) { monitor()->wait(/* no_safepoint_check */ true); } @@ -442,3 +474,122 @@ } return false; } + +bool FreeIdSet::_stat_init = false; +FreeIdSet* FreeIdSet::_sets[NSets]; +bool FreeIdSet::_safepoint; + +FreeIdSet::FreeIdSet(int sz, Monitor* mon) : + _sz(sz), _mon(mon), _hd(0), _waiters(0), _index(-1), _claimed(0) +{ + _ids = new int[sz]; + for (int i = 0; i < sz; i++) _ids[i] = i+1; + _ids[sz-1] = end_of_list; // end of list. + if (_stat_init) { + for (int j = 0; j < NSets; j++) _sets[j] = NULL; + _stat_init = true; + } + // Add to sets. (This should happen while the system is still single-threaded.) + for (int j = 0; j < NSets; j++) { + if (_sets[j] == NULL) { + _sets[j] = this; + _index = j; + break; + } + } + guarantee(_index != -1, "Too many FreeIdSets in use!"); +} + +FreeIdSet::~FreeIdSet() { + _sets[_index] = NULL; +} + +void FreeIdSet::set_safepoint(bool b) { + _safepoint = b; + if (b) { + for (int j = 0; j < NSets; j++) { + if (_sets[j] != NULL && _sets[j]->_waiters > 0) { + Monitor* mon = _sets[j]->_mon; + mon->lock_without_safepoint_check(); + mon->notify_all(); + mon->unlock(); + } + } + } +} + +#define FID_STATS 0 + +int FreeIdSet::claim_par_id() { +#if FID_STATS + thread_t tslf = thr_self(); + tty->print("claim_par_id[%d]: sz = %d, claimed = %d\n", tslf, _sz, _claimed); +#endif + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + while (!_safepoint && _hd == end_of_list) { + _waiters++; +#if FID_STATS + if (_waiters > 5) { + tty->print("claim_par_id waiting[%d]: %d waiters, %d claimed.\n", + tslf, _waiters, _claimed); + } +#endif + _mon->wait(Mutex::_no_safepoint_check_flag); + _waiters--; + } + if (_hd == end_of_list) { +#if FID_STATS + tty->print("claim_par_id[%d]: returning EOL.\n", tslf); +#endif + return -1; + } else { + int res = _hd; + _hd = _ids[res]; + _ids[res] = claimed; // For debugging. + _claimed++; +#if FID_STATS + tty->print("claim_par_id[%d]: returning %d, claimed = %d.\n", + tslf, res, _claimed); +#endif + return res; + } +} + +bool FreeIdSet::claim_perm_id(int i) { + assert(0 <= i && i < _sz, "Out of range."); + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + int prev = end_of_list; + int cur = _hd; + while (cur != end_of_list) { + if (cur == i) { + if (prev == end_of_list) { + _hd = _ids[cur]; + } else { + _ids[prev] = _ids[cur]; + } + _ids[cur] = claimed; + _claimed++; + return true; + } else { + prev = cur; + cur = _ids[cur]; + } + } + return false; + +} + +void FreeIdSet::release_par_id(int id) { + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + assert(_ids[id] == claimed, "Precondition."); + _ids[id] = _hd; + _hd = id; + _claimed--; +#if FID_STATS + tty->print("[%d] release_par_id(%d), waiters =%d, claimed = %d.\n", + thr_self(), id, _waiters, _claimed); +#endif + if (_waiters > 0) + // Notify all would be safer, but this is OK, right? + _mon->notify_all(); +} diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/workgroup.hpp --- a/src/share/vm/utilities/workgroup.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/workgroup.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -72,7 +72,8 @@ // Here's the public interface to this class. public: // Constructor and destructor. - AbstractWorkGang(const char* name, bool are_GC_threads); + AbstractWorkGang(const char* name, bool are_GC_task_threads, + bool are_ConcurrentGC_threads); ~AbstractWorkGang(); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task) = 0; @@ -83,7 +84,8 @@ const char* name() const; protected: // Initialize only instance data. - const bool _are_GC_threads; + const bool _are_GC_task_threads; + const bool _are_ConcurrentGC_threads; // Printing support. const char* _name; // The monitor which protects these data, @@ -130,8 +132,11 @@ int finished_workers() const { return _finished_workers; } - bool are_GC_threads() const { - return _are_GC_threads; + bool are_GC_task_threads() const { + return _are_GC_task_threads; + } + bool are_ConcurrentGC_threads() const { + return _are_ConcurrentGC_threads; } // Predicates. bool is_idle() const { @@ -190,7 +195,8 @@ class WorkGang: public AbstractWorkGang { public: // Constructor - WorkGang(const char* name, int workers, bool are_GC_threads); + WorkGang(const char* name, int workers, + bool are_GC_task_threads, bool are_ConcurrentGC_threads); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task); }; @@ -206,6 +212,7 @@ virtual void run(); // Predicate for Thread virtual bool is_GC_task_thread() const; + virtual bool is_ConcurrentGC_thread() const; // Printing void print_on(outputStream* st) const; virtual void print() const { print_on(tty); } @@ -228,12 +235,17 @@ Monitor _monitor; int _n_workers; int _n_completed; + bool _should_reset; - Monitor* monitor() { return &_monitor; } - int n_workers() { return _n_workers; } - int n_completed() { return _n_completed; } + Monitor* monitor() { return &_monitor; } + int n_workers() { return _n_workers; } + int n_completed() { return _n_completed; } + bool should_reset() { return _should_reset; } - void inc_completed() { _n_completed++; } + void zero_completed() { _n_completed = 0; } + void inc_completed() { _n_completed++; } + + void set_should_reset(bool v) { _should_reset = v; } public: WorkGangBarrierSync(); @@ -343,3 +355,42 @@ // cleanup if necessary. bool all_tasks_completed(); }; + +// Represents a set of free small integer ids. +class FreeIdSet { + enum { + end_of_list = -1, + claimed = -2 + }; + + int _sz; + Monitor* _mon; + + int* _ids; + int _hd; + int _waiters; + int _claimed; + + static bool _safepoint; + typedef FreeIdSet* FreeIdSetPtr; + static const int NSets = 10; + static FreeIdSetPtr _sets[NSets]; + static bool _stat_init; + int _index; + +public: + FreeIdSet(int sz, Monitor* mon); + ~FreeIdSet(); + + static void set_safepoint(bool b); + + // Attempt to claim the given id permanently. Returns "true" iff + // successful. + bool claim_perm_id(int i); + + // Returns an unclaimed parallel id (waiting for one to be released if + // necessary). Returns "-1" if a GC wakes up a wait for an id. + int claim_par_id(); + + void release_par_id(int id); +}; diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/yieldingWorkgroup.cpp --- a/src/share/vm/utilities/yieldingWorkgroup.cpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/yieldingWorkgroup.cpp Thu Jun 05 15:57:56 2008 -0700 @@ -31,8 +31,8 @@ class WorkData; YieldingFlexibleWorkGang::YieldingFlexibleWorkGang( - const char* name, int workers, bool are_GC_threads) : - AbstractWorkGang(name, are_GC_threads) { + const char* name, int workers, bool are_GC_task_threads) : + AbstractWorkGang(name, are_GC_task_threads, false) { // Save arguments. _total_workers = workers; assert(_total_workers > 0, "Must have more than 1 worker"); diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/yieldingWorkgroup.hpp --- a/src/share/vm/utilities/yieldingWorkgroup.hpp Wed Jun 04 13:51:09 2008 -0700 +++ b/src/share/vm/utilities/yieldingWorkgroup.hpp Thu Jun 05 15:57:56 2008 -0700 @@ -143,7 +143,8 @@ // Here's the public interface to this class. public: // Constructor and destructor. - YieldingFlexibleWorkGang(const char* name, int workers, bool are_GC_threads); + YieldingFlexibleWorkGang(const char* name, int workers, + bool are_GC_task_threads); YieldingFlexibleGangTask* yielding_task() const { assert(task() == NULL || task()->is_YieldingFlexibleGang_task(),