# HG changeset patch # User iveresov # Date 1315597477 25200 # Node ID 5257f8e66b40e47f841b9308d2c44d1e2c7d2eb6 # Parent e984655be42595224d940d7983b3fd18233e49a1# Parent 8f47d8870d9a8331dcb7726c75fae984602e046c Merge diff -r 8f47d8870d9a -r 5257f8e66b40 .hgtags --- a/.hgtags Thu Sep 08 09:35:41 2011 +0200 +++ b/.hgtags Fri Sep 09 12:44:37 2011 -0700 @@ -174,3 +174,10 @@ 9ad1548c6b63d596c411afc35147ffd5254426d9 hs21-b12 c149193c768b8b7233da4c3a3fdc0756b975848e hs21-b13 c149193c768b8b7233da4c3a3fdc0756b975848e jdk7-b143 +0cc8a70952c368e06de2adab1f2649a408f5e577 jdk8-b01 +31e253c1da429124bb87570ab095d9bc89850d0a jdk8-b02 +3a2fb61165dfc72e398179a2796d740c8da5b8c0 jdk8-b03 +0cc8a70952c368e06de2adab1f2649a408f5e577 hs22-b01 +7c29742c41b44fb0cd5a13c7ac8834f3f2ca649e hs22-b02 +3a2fb61165dfc72e398179a2796d740c8da5b8c0 hs22-b03 +ce9bde819dcba4a5d2822229d9183e69c74326ca hs22-b04 diff -r 8f47d8870d9a -r 5257f8e66b40 make/hotspot_version --- a/make/hotspot_version Thu Sep 08 09:35:41 2011 +0200 +++ b/make/hotspot_version Fri Sep 09 12:44:37 2011 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=22 HS_MINOR_VER=0 -HS_BUILD_NUMBER=02 +HS_BUILD_NUMBER=04 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r 8f47d8870d9a -r 5257f8e66b40 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -2165,29 +2165,6 @@ #endif } -void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, - Register s1, address d, - relocInfo::relocType rt ) { - assert_not_delayed(); - if (VM_Version::v9_instructions_work()) { - bpr(rc, a, p, s1, d, rt); - } else { - tst(s1); - br(reg_cond_to_cc_cond(rc), a, p, d, rt); - } -} - -void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, - Register s1, Label& L ) { - assert_not_delayed(); - if (VM_Version::v9_instructions_work()) { - bpr(rc, a, p, s1, L); - } else { - tst(s1); - br(reg_cond_to_cc_cond(rc), a, p, L); - } -} - // Compare registers and branch with nop in delay slot or cbcond without delay slot. // Compare integer (32 bit) values (icc only). @@ -4344,22 +4321,29 @@ } else { pre_val = O0; } + int satb_q_index_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()); + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), "check sizes in assembly below"); __ bind(restart); + + // Load the index into the SATB buffer. PtrQueue::_index is a size_t + // so ld_ptr is appropriate. __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 @@ -4470,9 +4454,8 @@ tmp); } - // Check on whether to annul. - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); - delayed()->nop(); + // Is marking active? + cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); // Do we need to load the previous value? if (obj != noreg) { @@ -4494,9 +4477,7 @@ assert(pre_val != noreg, "must have a real register"); // Is the previous value null? - // Check on whether to annul. - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); - delayed()->nop(); + cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); // OK, it's not filtered, so we'll need to call enqueue. In the normal // case, pre_val will be a scratch G-reg, but there are some cases in @@ -4523,39 +4504,6 @@ bind(filtered); } -static jint num_ct_writes = 0; -static jint num_ct_writes_filtered_in_hr = 0; -static jint num_ct_writes_filtered_null = 0; -static G1CollectedHeap* g1 = NULL; - -static Thread* count_ct_writes(void* filter_val, void* new_val) { - Atomic::inc(&num_ct_writes); - if (filter_val == NULL) { - Atomic::inc(&num_ct_writes_filtered_in_hr); - } else if (new_val == NULL) { - Atomic::inc(&num_ct_writes_filtered_null); - } else { - if (g1 == NULL) { - g1 = G1CollectedHeap::heap(); - } - } - if ((num_ct_writes % 1000000) == 0) { - jint num_ct_writes_filtered = - num_ct_writes_filtered_in_hr + - num_ct_writes_filtered_null; - - tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" - " (%5.2f%% intra-HR, %5.2f%% null).", - num_ct_writes, - 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, - 100.0*(float)num_ct_writes_filtered_in_hr/ - (float)num_ct_writes, - 100.0*(float)num_ct_writes_filtered_null/ - (float)num_ct_writes); - } - return Thread::current(); -} - static address dirty_card_log_enqueue = 0; static u_char* dirty_card_log_enqueue_end = 0; @@ -4578,11 +4526,8 @@ __ set(addrlit, O1); // O1 := __ ldub(O0, O1, O2); // O2 := [O0 + O1] - __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, - O2, not_already_dirty); - // Get O1 + O2 into a reg by itself -- useful in the take-the-branch - // case, harmless if not. - __ delayed()->add(O0, O1, O3); + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf @@ -4591,8 +4536,13 @@ // Not dirty. __ bind(not_already_dirty); + + // Get O0 + O1 into a reg by itself + __ add(O0, O1, O3); + // First, dirty it. __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + int dirty_card_q_index_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()); @@ -4600,12 +4550,15 @@ in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); __ bind(restart); + + // Load the index into the update buffer. PtrQueue::_index is + // a size_t so ld_ptr is appropriate here. __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, - L0, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); __ sub(L0, oopSize, L0); __ st_ptr(O3, L1, L0); // [_buf + index] := I0 @@ -4668,6 +4621,7 @@ G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::G1SATBCT || bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + if (G1RSBarrierRegionFilter) { xor3(store_addr, new_val, tmp); #ifdef _LP64 @@ -4676,33 +4630,8 @@ srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); #endif - if (G1PrintCTFilterStats) { - guarantee(tmp->is_global(), "Or stats won't work..."); - // This is a sleazy hack: I'm temporarily hijacking G2, which I - // promise to restore. - mov(new_val, G2); - save_frame(0); - mov(tmp, O0); - mov(G2, O1); - // Save G-regs that target may use. - mov(G1, L1); - mov(G2, L2); - mov(G3, L3); - mov(G4, L4); - mov(G5, L5); - call(CAST_FROM_FN_PTR(address, &count_ct_writes)); - delayed()->nop(); - mov(O0, G2); - // Restore G-regs that target may have used. - mov(L1, G1); - mov(L3, G3); - mov(L4, G4); - mov(L5, G5); - restore(G0, G0, G0); - } - // XXX Should I predict this taken or not? Does it mattern? - br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); - delayed()->nop(); + // XXX Should I predict this taken or not? Does it matter? + cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); } // If the "store_addr" register is an "in" or "local" register, move it to @@ -4727,7 +4656,6 @@ restore(); bind(filtered); - } #endif // SERIALGC diff -r 8f47d8870d9a -r 5257f8e66b40 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -1944,12 +1944,6 @@ void br_null ( Register s1, bool a, Predict p, Label& L ); void br_notnull( Register s1, bool a, Predict p, Label& L ); - // These versions will do the most efficient thing on v8 and v9. Perhaps - // this is what the routine above was meant to do, but it didn't (and - // didn't cover both target address kinds.) - void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); - void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); - // // Compare registers and branch with nop in delay slot or cbcond without delay slot. // diff -r 8f47d8870d9a -r 5257f8e66b40 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -421,8 +421,7 @@ } if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - pre_val_reg, _continuation); + __ br_null(pre_val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(pre_val_reg, G0); __ brx(Assembler::equal, false, Assembler::pn, _continuation); @@ -458,8 +457,7 @@ // The original src operand was not a constant. // Generate src == null? if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - src_reg, _continuation); + __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(src_reg, G0); __ brx(Assembler::equal, false, Assembler::pt, _continuation); @@ -476,13 +474,9 @@ Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); __ ld(ref_type_adr, tmp_reg); - if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - tmp_reg, _continuation); - } else { - __ cmp(tmp_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } + // _reference_type field is of type ReferenceType (enum) + assert(REF_NONE == 0, "check this code"); + __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); __ delayed()->nop(); // Is marking active? @@ -498,13 +492,8 @@ assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); __ ldsb(in_progress, tmp_reg); } - if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - tmp_reg, _continuation); - } else { - __ cmp(tmp_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } + + __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); __ delayed()->nop(); // val == null? @@ -512,8 +501,7 @@ Register val_reg = val()->as_register(); if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - val_reg, _continuation); + __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(val_reg, G0); __ brx(Assembler::equal, false, Assembler::pt, _continuation); @@ -542,9 +530,9 @@ assert(new_val()->is_register(), "Precondition."); Register addr_reg = addr()->as_pointer_register(); Register new_val_reg = new_val()->as_register(); + if (__ is_in_wdisp16_range(_continuation)) { - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, - new_val_reg, _continuation); + __ br_null(new_val_reg, /*annul*/false, Assembler::pt, _continuation); } else { __ cmp(new_val_reg, G0); __ brx(Assembler::equal, false, Assembler::pn, _continuation); diff -r 8f47d8870d9a -r 5257f8e66b40 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -827,14 +827,16 @@ int satb_q_buf_byte_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + // Load the index into the SATB buffer. PtrQueue::_index is a + // size_t so ld_ptr is appropriate __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, - Assembler::pn, tmp, refill); + // index == 0? + __ cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pn, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); __ sub(tmp, oopSize, tmp); __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := @@ -894,11 +896,8 @@ __ set(rs, cardtable); // cardtable := __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] - __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, - tmp, not_already_dirty); - // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch - // case, harmless if not. - __ delayed()->add(addr, cardtable, tmp2); + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); // We didn't take the branch, so we're already dirty: return. // Use return-from-leaf @@ -907,6 +906,10 @@ // Not dirty. __ bind(not_already_dirty); + + // Get cardtable + tmp into a reg by itself + __ add(addr, cardtable, tmp2); + // First, dirty it. __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). @@ -922,13 +925,17 @@ int dirty_card_q_buf_byte_offset = in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + + // Get the index into the update buffer. PtrQueue::_index is + // a size_t so ld_ptr is appropriate here. __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); - __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, - tmp3, refill); - // If the branch is taken, no harm in executing this in the delay slot. - __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + // index == 0? + __ cmp_and_brx_short(tmp3, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); __ sub(tmp3, oopSize, tmp3); __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := diff -r 8f47d8870d9a -r 5257f8e66b40 src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/os/linux/vm/os_linux.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -125,10 +125,6 @@ # include # include -#ifdef AMD64 -#include -#endif - #define MAX_PATH (2 * K) // for timer info max values which include all bits @@ -2502,7 +2498,13 @@ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; uintptr_t res = (uintptr_t) ::mmap(addr, size, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); - return res != (uintptr_t) MAP_FAILED; + if (res != (uintptr_t) MAP_FAILED) { + if (UseNUMAInterleaving) { + numa_make_global(addr, size); + } + return true; + } + return false; } // Define MAP_HUGETLB here so we can build HotSpot on old systems. @@ -2523,10 +2525,20 @@ (uintptr_t) ::mmap(addr, size, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0); - return res != (uintptr_t) MAP_FAILED; - } - - return commit_memory(addr, size, exec); + if (res != (uintptr_t) MAP_FAILED) { + if (UseNUMAInterleaving) { + numa_make_global(addr, size); + } + return true; + } + // Fall through and try to use small pages + } + + if (commit_memory(addr, size, exec)) { + realign_memory(addr, size, alignment_hint); + return true; + } + return false; } void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { @@ -2588,8 +2600,17 @@ int retval = -1; #if defined(IA32) +# ifndef SYS_getcpu +# define SYS_getcpu 318 +# endif retval = syscall(SYS_getcpu, &cpu, NULL, NULL); #elif defined(AMD64) +// Unfortunately we have to bring all these macros here from vsyscall.h +// to be able to compile on old linuxes. +# define __NR_vgetcpu 2 +# define VSYSCALL_START (-10UL << 20) +# define VSYSCALL_SIZE 1024 +# define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache); vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu); retval = vgetcpu(&cpu, NULL, NULL); @@ -3115,6 +3136,10 @@ return NULL; } + if ((addr != NULL) && UseNUMAInterleaving) { + numa_make_global(addr, bytes); + } + return addr; } diff -r 8f47d8870d9a -r 5257f8e66b40 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/os/solaris/vm/os_solaris.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -2777,8 +2777,14 @@ bool os::commit_memory(char* addr, size_t bytes, bool exec) { int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; size_t size = bytes; - return - NULL != Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot); + char *res = Solaris::mmap_chunk(addr, size, MAP_PRIVATE|MAP_FIXED, prot); + if (res != NULL) { + if (UseNUMAInterleaving) { + numa_make_global(addr, bytes); + } + return true; + } + return false; } bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint, @@ -3389,12 +3395,11 @@ return true; } -char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) { +char* os::reserve_memory_special(size_t size, char* addr, bool exec) { // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. assert(UseLargePages && UseISM, "only for ISM large pages"); - size_t size = bytes; char* retAddr = NULL; int shmid; key_t ismKey; @@ -3436,7 +3441,9 @@ } return NULL; } - + if ((retAddr != NULL) && UseNUMAInterleaving) { + numa_make_global(retAddr, size); + } return retAddr; } diff -r 8f47d8870d9a -r 5257f8e66b40 src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/os/windows/vm/os_windows.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -2614,6 +2614,57 @@ static HANDLE _hProcess; static HANDLE _hToken; +// Container for NUMA node list info +class NUMANodeListHolder { +private: + int *_numa_used_node_list; // allocated below + int _numa_used_node_count; + + void free_node_list() { + if (_numa_used_node_list != NULL) { + FREE_C_HEAP_ARRAY(int, _numa_used_node_list); + } + } + +public: + NUMANodeListHolder() { + _numa_used_node_count = 0; + _numa_used_node_list = NULL; + // do rest of initialization in build routine (after function pointers are set up) + } + + ~NUMANodeListHolder() { + free_node_list(); + } + + bool build() { + DWORD_PTR proc_aff_mask; + DWORD_PTR sys_aff_mask; + if (!GetProcessAffinityMask(GetCurrentProcess(), &proc_aff_mask, &sys_aff_mask)) return false; + ULONG highest_node_number; + if (!os::Kernel32Dll::GetNumaHighestNodeNumber(&highest_node_number)) return false; + free_node_list(); + _numa_used_node_list = NEW_C_HEAP_ARRAY(int, highest_node_number); + for (unsigned int i = 0; i <= highest_node_number; i++) { + ULONGLONG proc_mask_numa_node; + if (!os::Kernel32Dll::GetNumaNodeProcessorMask(i, &proc_mask_numa_node)) return false; + if ((proc_aff_mask & proc_mask_numa_node)!=0) { + _numa_used_node_list[_numa_used_node_count++] = i; + } + } + return (_numa_used_node_count > 1); + } + + int get_count() {return _numa_used_node_count;} + int get_node_list_entry(int n) { + // for indexes out of range, returns -1 + return (n < _numa_used_node_count ? _numa_used_node_list[n] : -1); + } + +} numa_node_list_holder; + + + static size_t _large_page_size = 0; static bool resolve_functions_for_large_page_init() { @@ -2653,6 +2704,153 @@ _hToken = NULL; } +static bool numa_interleaving_init() { + bool success = false; + bool use_numa_interleaving_specified = !FLAG_IS_DEFAULT(UseNUMAInterleaving); + + // print a warning if UseNUMAInterleaving flag is specified on command line + bool warn_on_failure = use_numa_interleaving_specified; +# define WARN(msg) if (warn_on_failure) { warning(msg); } + + // NUMAInterleaveGranularity cannot be less than vm_allocation_granularity (or _large_page_size if using large pages) + size_t min_interleave_granularity = UseLargePages ? _large_page_size : os::vm_allocation_granularity(); + NUMAInterleaveGranularity = align_size_up(NUMAInterleaveGranularity, min_interleave_granularity); + + if (os::Kernel32Dll::NumaCallsAvailable()) { + if (numa_node_list_holder.build()) { + if (PrintMiscellaneous && Verbose) { + tty->print("NUMA UsedNodeCount=%d, namely ", numa_node_list_holder.get_count()); + for (int i = 0; i < numa_node_list_holder.get_count(); i++) { + tty->print("%d ", numa_node_list_holder.get_node_list_entry(i)); + } + tty->print("\n"); + } + success = true; + } else { + WARN("Process does not cover multiple NUMA nodes."); + } + } else { + WARN("NUMA Interleaving is not supported by the operating system."); + } + if (!success) { + if (use_numa_interleaving_specified) WARN("...Ignoring UseNUMAInterleaving flag."); + } + return success; +#undef WARN +} + +// this routine is used whenever we need to reserve a contiguous VA range +// but we need to make separate VirtualAlloc calls for each piece of the range +// Reasons for doing this: +// * UseLargePagesIndividualAllocation was set (normally only needed on WS2003 but possible to be set otherwise) +// * UseNUMAInterleaving requires a separate node for each piece +static char* allocate_pages_individually(size_t bytes, char* addr, DWORD flags, DWORD prot, + bool should_inject_error=false) { + char * p_buf; + // note: at setup time we guaranteed that NUMAInterleaveGranularity was aligned up to a page size + size_t page_size = UseLargePages ? _large_page_size : os::vm_allocation_granularity(); + size_t chunk_size = UseNUMAInterleaving ? NUMAInterleaveGranularity : page_size; + + // first reserve enough address space in advance since we want to be + // able to break a single contiguous virtual address range into multiple + // large page commits but WS2003 does not allow reserving large page space + // so we just use 4K pages for reserve, this gives us a legal contiguous + // address space. then we will deallocate that reservation, and re alloc + // using large pages + const size_t size_of_reserve = bytes + chunk_size; + if (bytes > size_of_reserve) { + // Overflowed. + return NULL; + } + p_buf = (char *) VirtualAlloc(addr, + size_of_reserve, // size of Reserve + MEM_RESERVE, + PAGE_READWRITE); + // If reservation failed, return NULL + if (p_buf == NULL) return NULL; + + os::release_memory(p_buf, bytes + chunk_size); + + // we still need to round up to a page boundary (in case we are using large pages) + // but not to a chunk boundary (in case InterleavingGranularity doesn't align with page size) + // instead we handle this in the bytes_to_rq computation below + p_buf = (char *) align_size_up((size_t)p_buf, page_size); + + // now go through and allocate one chunk at a time until all bytes are + // allocated + size_t bytes_remaining = bytes; + // An overflow of align_size_up() would have been caught above + // in the calculation of size_of_reserve. + char * next_alloc_addr = p_buf; + HANDLE hProc = GetCurrentProcess(); + +#ifdef ASSERT + // Variable for the failure injection + long ran_num = os::random(); + size_t fail_after = ran_num % bytes; +#endif + + int count=0; + while (bytes_remaining) { + // select bytes_to_rq to get to the next chunk_size boundary + + size_t bytes_to_rq = MIN2(bytes_remaining, chunk_size - ((size_t)next_alloc_addr % chunk_size)); + // Note allocate and commit + char * p_new; + +#ifdef ASSERT + bool inject_error_now = should_inject_error && (bytes_remaining <= fail_after); +#else + const bool inject_error_now = false; +#endif + + if (inject_error_now) { + p_new = NULL; + } else { + if (!UseNUMAInterleaving) { + p_new = (char *) VirtualAlloc(next_alloc_addr, + bytes_to_rq, + flags, + prot); + } else { + // get the next node to use from the used_node_list + assert(numa_node_list_holder.get_count() > 0, "Multiple NUMA nodes expected"); + DWORD node = numa_node_list_holder.get_node_list_entry(count % numa_node_list_holder.get_count()); + p_new = (char *)os::Kernel32Dll::VirtualAllocExNuma(hProc, + next_alloc_addr, + bytes_to_rq, + flags, + prot, + node); + } + } + + if (p_new == NULL) { + // Free any allocated pages + if (next_alloc_addr > p_buf) { + // Some memory was committed so release it. + size_t bytes_to_release = bytes - bytes_remaining; + os::release_memory(p_buf, bytes_to_release); + } +#ifdef ASSERT + if (should_inject_error) { + if (TracePageSizes && Verbose) { + tty->print_cr("Reserving pages individually failed."); + } + } +#endif + return NULL; + } + bytes_remaining -= bytes_to_rq; + next_alloc_addr += bytes_to_rq; + count++; + } + // made it this far, success + return p_buf; +} + + + void os::large_page_init() { if (!UseLargePages) return; @@ -2722,9 +2920,30 @@ assert((size_t)addr % os::vm_allocation_granularity() == 0, "reserve alignment"); assert(bytes % os::vm_allocation_granularity() == 0, "reserve block size"); - char* res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); + char* res; + // note that if UseLargePages is on, all the areas that require interleaving + // will go thru reserve_memory_special rather than thru here. + bool use_individual = (UseNUMAInterleaving && !UseLargePages); + if (!use_individual) { + res = (char*)VirtualAlloc(addr, bytes, MEM_RESERVE, PAGE_READWRITE); + } else { + elapsedTimer reserveTimer; + if( Verbose && PrintMiscellaneous ) reserveTimer.start(); + // in numa interleaving, we have to allocate pages individually + // (well really chunks of NUMAInterleaveGranularity size) + res = allocate_pages_individually(bytes, addr, MEM_RESERVE, PAGE_READWRITE); + if (res == NULL) { + warning("NUMA page allocation failed"); + } + if( Verbose && PrintMiscellaneous ) { + reserveTimer.stop(); + tty->print_cr("reserve_memory of %Ix bytes took %ld ms (%ld ticks)", bytes, + reserveTimer.milliseconds(), reserveTimer.ticks()); + } + } assert(res == NULL || addr == NULL || addr == res, "Unexpected address from reserve."); + return res; } @@ -2754,92 +2973,27 @@ char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) { const DWORD prot = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; - - if (UseLargePagesIndividualAllocation) { + const DWORD flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; + + // with large pages, there are two cases where we need to use Individual Allocation + // 1) the UseLargePagesIndividualAllocation flag is set (set by default on WS2003) + // 2) NUMA Interleaving is enabled, in which case we use a different node for each page + if (UseLargePagesIndividualAllocation || UseNUMAInterleaving) { if (TracePageSizes && Verbose) { tty->print_cr("Reserving large pages individually."); } - char * p_buf; - // first reserve enough address space in advance since we want to be - // able to break a single contiguous virtual address range into multiple - // large page commits but WS2003 does not allow reserving large page space - // so we just use 4K pages for reserve, this gives us a legal contiguous - // address space. then we will deallocate that reservation, and re alloc - // using large pages - const size_t size_of_reserve = bytes + _large_page_size; - if (bytes > size_of_reserve) { - // Overflowed. - warning("Individually allocated large pages failed, " - "use -XX:-UseLargePagesIndividualAllocation to turn off"); + char * p_buf = allocate_pages_individually(bytes, addr, flags, prot, LargePagesIndividualAllocationInjectError); + if (p_buf == NULL) { + // give an appropriate warning message + if (UseNUMAInterleaving) { + warning("NUMA large page allocation failed, UseLargePages flag ignored"); + } + if (UseLargePagesIndividualAllocation) { + warning("Individually allocated large pages failed, " + "use -XX:-UseLargePagesIndividualAllocation to turn off"); + } return NULL; } - p_buf = (char *) VirtualAlloc(addr, - size_of_reserve, // size of Reserve - MEM_RESERVE, - PAGE_READWRITE); - // If reservation failed, return NULL - if (p_buf == NULL) return NULL; - - release_memory(p_buf, bytes + _large_page_size); - // round up to page boundary. If the size_of_reserve did not - // overflow and the reservation did not fail, this align up - // should not overflow. - p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size); - - // now go through and allocate one page at a time until all bytes are - // allocated - size_t bytes_remaining = align_size_up(bytes, _large_page_size); - // An overflow of align_size_up() would have been caught above - // in the calculation of size_of_reserve. - char * next_alloc_addr = p_buf; - -#ifdef ASSERT - // Variable for the failure injection - long ran_num = os::random(); - size_t fail_after = ran_num % bytes; -#endif - - while (bytes_remaining) { - size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size); - // Note allocate and commit - char * p_new; - -#ifdef ASSERT - bool inject_error = LargePagesIndividualAllocationInjectError && - (bytes_remaining <= fail_after); -#else - const bool inject_error = false; -#endif - - if (inject_error) { - p_new = NULL; - } else { - p_new = (char *) VirtualAlloc(next_alloc_addr, - bytes_to_rq, - MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, - prot); - } - - if (p_new == NULL) { - // Free any allocated pages - if (next_alloc_addr > p_buf) { - // Some memory was committed so release it. - size_t bytes_to_release = bytes - bytes_remaining; - release_memory(p_buf, bytes_to_release); - } -#ifdef ASSERT - if (UseLargePagesIndividualAllocation && - LargePagesIndividualAllocationInjectError) { - if (TracePageSizes && Verbose) { - tty->print_cr("Reserving large pages individually failed."); - } - } -#endif - return NULL; - } - bytes_remaining -= bytes_to_rq; - next_alloc_addr += bytes_to_rq; - } return p_buf; @@ -2867,14 +3021,43 @@ assert(bytes % os::vm_page_size() == 0, "commit in page-sized chunks"); // Don't attempt to print anything if the OS call fails. We're // probably low on resources, so the print itself may cause crashes. - bool result = VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_READWRITE) != 0; - if (result != NULL && exec) { - DWORD oldprot; - // Windows doc says to use VirtualProtect to get execute permissions - return VirtualProtect(addr, bytes, PAGE_EXECUTE_READWRITE, &oldprot) != 0; + + // unless we have NUMAInterleaving enabled, the range of a commit + // is always within a reserve covered by a single VirtualAlloc + // in that case we can just do a single commit for the requested size + if (!UseNUMAInterleaving) { + if (VirtualAlloc(addr, bytes, MEM_COMMIT, PAGE_READWRITE) == NULL) return false; + if (exec) { + DWORD oldprot; + // Windows doc says to use VirtualProtect to get execute permissions + if (!VirtualProtect(addr, bytes, PAGE_EXECUTE_READWRITE, &oldprot)) return false; + } + return true; } else { - return result; - } + + // when NUMAInterleaving is enabled, the commit might cover a range that + // came from multiple VirtualAlloc reserves (using allocate_pages_individually). + // VirtualQuery can help us determine that. The RegionSize that VirtualQuery + // returns represents the number of bytes that can be committed in one step. + size_t bytes_remaining = bytes; + char * next_alloc_addr = addr; + while (bytes_remaining > 0) { + MEMORY_BASIC_INFORMATION alloc_info; + VirtualQuery(next_alloc_addr, &alloc_info, sizeof(alloc_info)); + size_t bytes_to_rq = MIN2(bytes_remaining, (size_t)alloc_info.RegionSize); + if (VirtualAlloc(next_alloc_addr, bytes_to_rq, MEM_COMMIT, PAGE_READWRITE) == NULL) + return false; + if (exec) { + DWORD oldprot; + if (!VirtualProtect(next_alloc_addr, bytes_to_rq, PAGE_EXECUTE_READWRITE, &oldprot)) + return false; + } + bytes_remaining -= bytes_to_rq; + next_alloc_addr += bytes_to_rq; + } + } + // if we made it this far, return true + return true; } bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, @@ -2948,14 +3131,21 @@ void os::numa_make_global(char *addr, size_t bytes) { } void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { } bool os::numa_topology_changed() { return false; } -size_t os::numa_get_groups_num() { return 1; } +size_t os::numa_get_groups_num() { return MAX2(numa_node_list_holder.get_count(), 1); } int os::numa_get_group_id() { return 0; } size_t os::numa_get_leaf_groups(int *ids, size_t size) { - if (size > 0) { + if (numa_node_list_holder.get_count() == 0 && size > 0) { + // Provide an answer for UMA systems ids[0] = 0; return 1; - } - return 0; + } else { + // check for size bigger than actual groups_num + size = MIN2(size, numa_get_groups_num()); + for (int i = 0; i < (int)size; i++) { + ids[i] = numa_node_list_holder.get_node_list_entry(i); + } + return size; + } } bool os::get_page_info(char *start, page_info* info) { @@ -3480,7 +3670,7 @@ if(Verbose && PrintMiscellaneous) tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", (intptr_t)mem_serialize_page); #endif -} + } os::large_page_init(); @@ -3584,7 +3774,13 @@ prio_init(); if (UseNUMA && !ForceNUMA) { - UseNUMA = false; // Currently unsupported. + UseNUMA = false; // We don't fully support this yet + } + + if (UseNUMAInterleaving) { + // first check whether this Windows OS supports VirtualAllocExNuma, if not ignore this flag + bool success = numa_interleaving_init(); + if (!success) UseNUMAInterleaving = false; } return JNI_OK; @@ -4758,7 +4954,14 @@ // Kernel32 API typedef SIZE_T (WINAPI* GetLargePageMinimum_Fn)(void); +typedef LPVOID (WINAPI *VirtualAllocExNuma_Fn) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD); +typedef BOOL (WINAPI *GetNumaHighestNodeNumber_Fn) (PULONG); +typedef BOOL (WINAPI *GetNumaNodeProcessorMask_Fn) (UCHAR, PULONGLONG); + GetLargePageMinimum_Fn os::Kernel32Dll::_GetLargePageMinimum = NULL; +VirtualAllocExNuma_Fn os::Kernel32Dll::_VirtualAllocExNuma = NULL; +GetNumaHighestNodeNumber_Fn os::Kernel32Dll::_GetNumaHighestNodeNumber = NULL; +GetNumaNodeProcessorMask_Fn os::Kernel32Dll::_GetNumaNodeProcessorMask = NULL; BOOL os::Kernel32Dll::initialized = FALSE; SIZE_T os::Kernel32Dll::GetLargePageMinimum() { assert(initialized && _GetLargePageMinimum != NULL, @@ -4773,16 +4976,53 @@ return _GetLargePageMinimum != NULL; } +BOOL os::Kernel32Dll::NumaCallsAvailable() { + if (!initialized) { + initialize(); + } + return _VirtualAllocExNuma != NULL; +} + +LPVOID os::Kernel32Dll::VirtualAllocExNuma(HANDLE hProc, LPVOID addr, SIZE_T bytes, DWORD flags, DWORD prot, DWORD node) { + assert(initialized && _VirtualAllocExNuma != NULL, + "NUMACallsAvailable() not yet called"); + + return _VirtualAllocExNuma(hProc, addr, bytes, flags, prot, node); +} + +BOOL os::Kernel32Dll::GetNumaHighestNodeNumber(PULONG ptr_highest_node_number) { + assert(initialized && _GetNumaHighestNodeNumber != NULL, + "NUMACallsAvailable() not yet called"); + + return _GetNumaHighestNodeNumber(ptr_highest_node_number); +} + +BOOL os::Kernel32Dll::GetNumaNodeProcessorMask(UCHAR node, PULONGLONG proc_mask) { + assert(initialized && _GetNumaNodeProcessorMask != NULL, + "NUMACallsAvailable() not yet called"); + + return _GetNumaNodeProcessorMask(node, proc_mask); +} + + +void os::Kernel32Dll::initializeCommon() { + if (!initialized) { + HMODULE handle = ::GetModuleHandle("Kernel32.dll"); + assert(handle != NULL, "Just check"); + _GetLargePageMinimum = (GetLargePageMinimum_Fn)::GetProcAddress(handle, "GetLargePageMinimum"); + _VirtualAllocExNuma = (VirtualAllocExNuma_Fn)::GetProcAddress(handle, "VirtualAllocExNuma"); + _GetNumaHighestNodeNumber = (GetNumaHighestNodeNumber_Fn)::GetProcAddress(handle, "GetNumaHighestNodeNumber"); + _GetNumaNodeProcessorMask = (GetNumaNodeProcessorMask_Fn)::GetProcAddress(handle, "GetNumaNodeProcessorMask"); + initialized = TRUE; + } +} + + #ifndef JDK6_OR_EARLIER void os::Kernel32Dll::initialize() { - if (!initialized) { - HMODULE handle = ::GetModuleHandle("Kernel32.dll"); - assert(handle != NULL, "Just check"); - _GetLargePageMinimum = (GetLargePageMinimum_Fn)::GetProcAddress(handle, "GetLargePageMinimum"); - initialized = TRUE; - } + initializeCommon(); } @@ -4887,18 +5127,19 @@ Module32Next_Fn os::Kernel32Dll::_Module32Next = NULL; GetNativeSystemInfo_Fn os::Kernel32Dll::_GetNativeSystemInfo = NULL; + void os::Kernel32Dll::initialize() { if (!initialized) { HMODULE handle = ::GetModuleHandle("Kernel32.dll"); assert(handle != NULL, "Just check"); _SwitchToThread = (SwitchToThread_Fn)::GetProcAddress(handle, "SwitchToThread"); - _GetLargePageMinimum = (GetLargePageMinimum_Fn)::GetProcAddress(handle, "GetLargePageMinimum"); _CreateToolhelp32Snapshot = (CreateToolhelp32Snapshot_Fn) ::GetProcAddress(handle, "CreateToolhelp32Snapshot"); _Module32First = (Module32First_Fn)::GetProcAddress(handle, "Module32First"); _Module32Next = (Module32Next_Fn)::GetProcAddress(handle, "Module32Next"); _GetNativeSystemInfo = (GetNativeSystemInfo_Fn)::GetProcAddress(handle, "GetNativeSystemInfo"); + initializeCommon(); // resolve the functions that always need resolving initialized = TRUE; } @@ -4964,6 +5205,8 @@ _GetNativeSystemInfo(lpSystemInfo); } + + // PSAPI API diff -r 8f47d8870d9a -r 5257f8e66b40 src/os/windows/vm/os_windows.hpp --- a/src/os/windows/vm/os_windows.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/os/windows/vm/os_windows.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -173,13 +173,25 @@ static BOOL GetNativeSystemInfoAvailable(); static void GetNativeSystemInfo(LPSYSTEM_INFO); + // NUMA calls + static BOOL NumaCallsAvailable(); + static LPVOID VirtualAllocExNuma(HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD); + static BOOL GetNumaHighestNodeNumber(PULONG); + static BOOL GetNumaNodeProcessorMask(UCHAR, PULONGLONG); + private: // GetLargePageMinimum available on Windows Vista/Windows Server 2003 // and later + // NUMA calls available Windows Vista/WS2008 and later + static SIZE_T (WINAPI *_GetLargePageMinimum)(void); + static LPVOID (WINAPI *_VirtualAllocExNuma) (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD); + static BOOL (WINAPI *_GetNumaHighestNodeNumber) (PULONG); + static BOOL (WINAPI *_GetNumaNodeProcessorMask) (UCHAR, PULONGLONG); static BOOL initialized; static void initialize(); + static void initializeCommon(); #ifdef JDK6_OR_EARLIER private: diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/tools/ProjectCreator/WinGammaPlatformVC10.java --- a/src/share/tools/ProjectCreator/WinGammaPlatformVC10.java Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/tools/ProjectCreator/WinGammaPlatformVC10.java Fri Sep 09 12:44:37 2011 -0700 @@ -482,7 +482,7 @@ "/export:JVM_GetThreadStateNames "+ "/export:JVM_GetThreadStateValues "+ "/export:JVM_InitAgentProperties"); - addAttr(rv, "AdditionalDependencies", "kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;Wsock32.lib;winmm.lib"); + addAttr(rv, "AdditionalDependencies", "kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;Wsock32.lib;winmm.lib;psapi.lib"); addAttr(rv, "OutputFile", outDll); addAttr(rv, "SuppressStartupBanner", "true"); addAttr(rv, "ModuleDefinitionFile", outDir+Util.sep+"vm.def"); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/collectionSetChooser.cpp --- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #include "gc_implementation/g1/collectionSetChooser.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "memory/space.inline.hpp" CSetChooserCache::CSetChooserCache() { @@ -358,6 +359,9 @@ if (_cache.is_empty()) { assert(_curMarkedIndex == _numMarkedRegions, "if cache is empty, list should also be empty"); + ergo_verbose0(ErgoCSetConstruction, + "stop adding old regions to CSet", + ergo_format_reason("cache is empty")); return NULL; } @@ -368,10 +372,23 @@ if (g1p->adaptive_young_list_length()) { if (time_remaining - predicted_time < 0.0) { g1h->check_if_region_is_too_expensive(predicted_time); + ergo_verbose2(ErgoCSetConstruction, + "stop adding old regions to CSet", + ergo_format_reason("predicted old region time higher than remaining time") + ergo_format_ms("predicted old region time") + ergo_format_ms("remaining time"), + predicted_time, time_remaining); return NULL; } } else { - if (predicted_time > 2.0 * avg_prediction) { + double threshold = 2.0 * avg_prediction; + if (predicted_time > threshold) { + ergo_verbose2(ErgoCSetConstruction, + "stop adding old regions to CSet", + ergo_format_reason("predicted old region time higher than threshold") + ergo_format_ms("predicted old region time") + ergo_format_ms("threshold"), + predicted_time, threshold); return NULL; } } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -91,7 +91,7 @@ } } - g1p->check_prediction_validity(); + g1p->revise_young_list_target_length_if_necessary(); } } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -28,6 +28,7 @@ #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" #include "gc_implementation/g1/g1RemSet.hpp" #include "gc_implementation/g1/heapRegionRemSet.hpp" @@ -801,39 +802,6 @@ reset(); } -class CMMarkRootsClosure: public OopsInGenClosure { -private: - ConcurrentMark* _cm; - G1CollectedHeap* _g1h; - bool _do_barrier; - -public: - CMMarkRootsClosure(ConcurrentMark* cm, - G1CollectedHeap* g1h, - bool do_barrier) : _cm(cm), _g1h(g1h), - _do_barrier(do_barrier) { } - - virtual void do_oop(narrowOop* p) { do_oop_work(p); } - virtual void do_oop( oop* p) { do_oop_work(p); } - - template void do_oop_work(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - assert(obj->is_oop() || obj->mark() == NULL, - "expected an oop, possibly with mark word displaced"); - HeapWord* addr = (HeapWord*)obj; - if (_g1h->is_in_g1_reserved(addr)) { - _cm->grayRoot(obj); - } - } - if (_do_barrier) { - assert(!_g1h->is_in_g1_reserved(p), - "Should be called on external roots"); - do_barrier(p); - } - } -}; void ConcurrentMark::checkpointRootsInitialPost() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -868,50 +836,6 @@ // during it. No need to call it here. } -// Checkpoint the roots into this generation from outside -// this generation. [Note this initial checkpoint need only -// be approximate -- we'll do a catch up phase subsequently.] -void ConcurrentMark::checkpointRootsInitial() { - assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - - double start = os::elapsedTime(); - - G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); - g1p->record_concurrent_mark_init_start(); - checkpointRootsInitialPre(); - - // YSR: when concurrent precleaning is in place, we'll - // need to clear the cached card table here - - ResourceMark rm; - HandleMark hm; - - g1h->ensure_parsability(false); - g1h->perm_gen()->save_marks(); - - CMMarkRootsClosure notOlder(this, g1h, false); - CMMarkRootsClosure older(this, g1h, true); - - g1h->set_marking_started(); - g1h->rem_set()->prepare_for_younger_refs_iterate(false); - - g1h->process_strong_roots(true, // activate StrongRootsScope - false, // fake perm gen collection - SharedHeap::SO_AllClasses, - ¬Older, // Regular roots - NULL, // do not visit active blobs - &older // Perm Gen Roots - ); - checkpointRootsInitialPost(); - - // Statistics. - double end = os::elapsedTime(); - _init_times.add((end - start) * 1000.0); - - g1p->record_concurrent_mark_init_end(); -} - /* * Notice that in the next two methods, we actually leave the STS * during the barrier sync and join it immediately afterwards. If we @@ -1804,18 +1728,21 @@ size_t known_garbage_bytes = g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); -#if 0 - gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", - (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), - (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), - (double) known_garbage_bytes / (double) (1024 * 1024)); -#endif // 0 g1p->set_known_garbage_bytes(known_garbage_bytes); size_t start_used_bytes = g1h->used(); _at_least_one_mark_complete = true; g1h->set_marking_complete(); + ergo_verbose4(ErgoConcCycles, + "finish cleanup", + ergo_format_byte("occupancy") + ergo_format_byte("capacity") + ergo_format_byte_perc("known garbage"), + start_used_bytes, g1h->capacity(), + known_garbage_bytes, + ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0); + double count_end = os::elapsedTime(); double this_final_counting_time = (count_end - start); if (G1PrintParCleanupStats) { diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -756,9 +756,6 @@ // Clear the next marking bitmap (will be called concurrently). void clearNextBitmap(); - // main CMS steps and related support - void checkpointRootsInitial(); - // These two do the work that needs to be done before and after the // initial root checkpoint. Since this checkpoint can be done at two // different points (i.e. an explicit pause or piggy-backed on a diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -50,19 +50,6 @@ create_and_start(); } -class CMCheckpointRootsInitialClosure: public VoidClosure { - - ConcurrentMark* _cm; -public: - - CMCheckpointRootsInitialClosure(ConcurrentMark* cm) : - _cm(cm) {} - - void do_void(){ - _cm->checkpointRootsInitial(); - } -}; - class CMCheckpointRootsFinalClosure: public VoidClosure { ConcurrentMark* _cm; @@ -116,27 +103,6 @@ gclog_or_tty->print_cr("[GC concurrent-mark-start]"); } - if (!g1_policy->in_young_gc_mode()) { - // this ensures the flag is not set if we bail out of the marking - // cycle; normally the flag is cleared immediately after cleanup - g1h->set_marking_complete(); - - if (g1_policy->adaptive_young_list_length()) { - double now = os::elapsedTime(); - double init_prediction_ms = g1_policy->predict_init_time_ms(); - jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms); - os::sleep(current_thread, sleep_time_ms, false); - } - - // We don't have to skip here if we've been asked to restart, because - // in the worst case we just enqueue a new VM operation to start a - // marking. Note that the init operation resets has_aborted() - CMCheckpointRootsInitialClosure init_cl(_cm); - strcpy(verbose_str, "GC initial-mark"); - VM_CGC_Operation op(&init_cl, verbose_str); - VMThread::execute(&op); - } - int iter = 0; do { iter++; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -31,6 +31,7 @@ #include "gc_implementation/g1/g1AllocRegion.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "gc_implementation/g1/g1MarkSweep.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" @@ -577,6 +578,11 @@ res = new_region_try_secondary_free_list(); } if (res == NULL && do_expand) { + ergo_verbose1(ErgoHeapSizing, + "attempt heap expansion", + ergo_format_reason("region allocation request failed") + ergo_format_byte("allocation request"), + word_size * HeapWordSize); if (expand(word_size * HeapWordSize)) { // Even though the heap was expanded, it might not have reached // the desired size. So, we cannot assume that the allocation @@ -790,6 +796,11 @@ // room available. assert(num_regions > fs, "earlier allocation should have succeeded"); + ergo_verbose1(ErgoHeapSizing, + "attempt heap expansion", + ergo_format_reason("humongous allocation request failed") + ergo_format_byte("allocation request"), + word_size * HeapWordSize); if (expand((num_regions - fs) * HeapRegion::GrainBytes)) { // Even though the heap was expanded, it might not have // reached the desired size. So, we cannot assume that the @@ -906,6 +917,8 @@ if (GC_locker::is_active_and_needs_gc()) { if (g1_policy()->can_expand_young_list()) { + // No need for an ergo verbose message here, + // can_expand_young_list() does this when it returns true. result = _mutator_alloc_region.attempt_allocation_force(word_size, false /* bot_updates */); if (result != NULL) { @@ -1227,6 +1240,7 @@ /* option */ VerifyOption_G1UsePrevMarking); } + pre_full_gc_dump(); COMPILER2_PRESENT(DerivedPointerTable::clear()); @@ -1263,10 +1277,8 @@ g1_policy()->clear_incremental_cset(); g1_policy()->stop_incremental_cset_building(); - if (g1_policy()->in_young_gc_mode()) { - empty_young_list(); - g1_policy()->set_full_young_gcs(true); - } + empty_young_list(); + g1_policy()->set_full_young_gcs(true); // See the comment in G1CollectedHeap::ref_processing_init() about // how reference processing currently works in G1. @@ -1387,13 +1399,11 @@ || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); } - if (g1_policy()->in_young_gc_mode()) { - _young_list->reset_sampled_info(); - // At this point there should be no regions in the - // entire heap tagged as young. - assert( check_young_list_empty(true /* check_heap */), - "young list should be empty at this point"); - } + _young_list->reset_sampled_info(); + // At this point there should be no regions in the + // entire heap tagged as young. + assert( check_young_list_empty(true /* check_heap */), + "young list should be empty at this point"); // Update the number of full collections that have been completed. increment_full_collections_completed(false /* concurrent */); @@ -1405,6 +1415,7 @@ Universe::print_heap_after_gc(); } g1mm()->update_counters(); + post_full_gc_dump(); return true; } @@ -1479,63 +1490,34 @@ // we'll try to make the capacity smaller than it, not greater). maximum_desired_capacity = MAX2(maximum_desired_capacity, min_heap_size); - if (PrintGC && Verbose) { - const double free_percentage = - (double) free_after_gc / (double) capacity_after_gc; - gclog_or_tty->print_cr("Computing new size after full GC "); - gclog_or_tty->print_cr(" " - " minimum_free_percentage: %6.2f", - minimum_free_percentage); - gclog_or_tty->print_cr(" " - " maximum_free_percentage: %6.2f", - maximum_free_percentage); - gclog_or_tty->print_cr(" " - " capacity: %6.1fK" - " minimum_desired_capacity: %6.1fK" - " maximum_desired_capacity: %6.1fK", - (double) capacity_after_gc / (double) K, - (double) minimum_desired_capacity / (double) K, - (double) maximum_desired_capacity / (double) K); - gclog_or_tty->print_cr(" " - " free_after_gc: %6.1fK" - " used_after_gc: %6.1fK", - (double) free_after_gc / (double) K, - (double) used_after_gc / (double) K); - gclog_or_tty->print_cr(" " - " free_percentage: %6.2f", - free_percentage); - } if (capacity_after_gc < minimum_desired_capacity) { // Don't expand unless it's significant size_t expand_bytes = minimum_desired_capacity - capacity_after_gc; - if (expand(expand_bytes)) { - if (PrintGC && Verbose) { - gclog_or_tty->print_cr(" " - " expanding:" - " max_heap_size: %6.1fK" - " minimum_desired_capacity: %6.1fK" - " expand_bytes: %6.1fK", - (double) max_heap_size / (double) K, - (double) minimum_desired_capacity / (double) K, - (double) expand_bytes / (double) K); - } - } + ergo_verbose4(ErgoHeapSizing, + "attempt heap expansion", + ergo_format_reason("capacity lower than " + "min desired capacity after Full GC") + ergo_format_byte("capacity") + ergo_format_byte("occupancy") + ergo_format_byte_perc("min desired capacity"), + capacity_after_gc, used_after_gc, + minimum_desired_capacity, (double) MinHeapFreeRatio); + expand(expand_bytes); // No expansion, now see if we want to shrink } else if (capacity_after_gc > maximum_desired_capacity) { // Capacity too large, compute shrinking size size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity; + ergo_verbose4(ErgoHeapSizing, + "attempt heap shrinking", + ergo_format_reason("capacity higher than " + "max desired capacity after Full GC") + ergo_format_byte("capacity") + ergo_format_byte("occupancy") + ergo_format_byte_perc("max desired capacity"), + capacity_after_gc, used_after_gc, + maximum_desired_capacity, (double) MaxHeapFreeRatio); shrink(shrink_bytes); - if (PrintGC && Verbose) { - gclog_or_tty->print_cr(" " - " shrinking:" - " min_heap_size: %6.1fK" - " maximum_desired_capacity: %6.1fK" - " shrink_bytes: %6.1fK", - (double) min_heap_size / (double) K, - (double) maximum_desired_capacity / (double) K, - (double) shrink_bytes / (double) K); - } } } @@ -1621,6 +1603,11 @@ verify_region_sets_optional(); size_t expand_bytes = MAX2(word_size * HeapWordSize, MinHeapDeltaBytes); + ergo_verbose1(ErgoHeapSizing, + "attempt heap expansion", + ergo_format_reason("allocation request failed") + ergo_format_byte("allocation request"), + word_size * HeapWordSize); if (expand(expand_bytes)) { _hrs.verify_optional(); verify_region_sets_optional(); @@ -1648,11 +1635,11 @@ size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes); aligned_expand_bytes = align_size_up(aligned_expand_bytes, HeapRegion::GrainBytes); - - if (Verbose && PrintGC) { - gclog_or_tty->print("Expanding garbage-first heap from %ldK by %ldK", - old_mem_size/K, aligned_expand_bytes/K); - } + ergo_verbose2(ErgoHeapSizing, + "expand the heap", + ergo_format_byte("requested expansion amount") + ergo_format_byte("attempted expansion amount"), + expand_bytes, aligned_expand_bytes); // First commit the memory. HeapWord* old_end = (HeapWord*) _g1_storage.high(); @@ -1695,7 +1682,11 @@ } assert(curr == mr.end(), "post-condition"); } + g1_policy()->record_new_heap_size(n_regions()); } else { + ergo_verbose0(ErgoHeapSizing, + "did not expand the heap", + ergo_format_reason("heap expansion operation failed")); // The expansion of the virtual storage space was unsuccessful. // Let's see if it was because we ran out of swap. if (G1ExitOnExpansionFailure && @@ -1704,13 +1695,6 @@ vm_exit_out_of_memory(aligned_expand_bytes, "G1 heap expansion"); } } - - if (Verbose && PrintGC) { - size_t new_mem_size = _g1_storage.committed_size(); - gclog_or_tty->print_cr("...%s, expanded to %ldK", - (successful ? "Successful" : "Failed"), - new_mem_size/K); - } return successful; } @@ -1724,6 +1708,13 @@ MemRegion mr = _hrs.shrink_by(aligned_shrink_bytes, &num_regions_deleted); HeapWord* old_end = (HeapWord*) _g1_storage.high(); assert(mr.end() == old_end, "post-condition"); + + ergo_verbose3(ErgoHeapSizing, + "shrink the heap", + ergo_format_byte("requested shrinking amount") + ergo_format_byte("aligned shrinking amount") + ergo_format_byte("attempted shrinking amount"), + shrink_bytes, aligned_shrink_bytes, mr.byte_size()); if (mr.byte_size() > 0) { if (_hr_printer.is_active()) { HeapWord* curr = mr.end(); @@ -1742,13 +1733,11 @@ _expansion_regions += num_regions_deleted; update_committed_space(old_end, new_end); HeapRegionRemSet::shrink_heap(n_regions()); - - if (Verbose && PrintGC) { - size_t new_mem_size = _g1_storage.committed_size(); - gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK", - old_mem_size/K, aligned_shrink_bytes/K, - new_mem_size/K); - } + g1_policy()->record_new_heap_size(n_regions()); + } else { + ergo_verbose0(ErgoHeapSizing, + "did not shrink the heap", + ergo_format_reason("heap shrinking operation failed")); } } @@ -3161,12 +3150,6 @@ } } -void G1CollectedHeap::do_sync_mark() { - _cm->checkpointRootsInitial(); - _cm->markFromRoots(); - _cm->checkpointRootsFinal(false); -} - // double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr, @@ -3317,11 +3300,10 @@ char verbose_str[128]; sprintf(verbose_str, "GC pause "); - if (g1_policy()->in_young_gc_mode()) { - if (g1_policy()->full_young_gcs()) - strcat(verbose_str, "(young)"); - else - strcat(verbose_str, "(partial)"); + if (g1_policy()->full_young_gcs()) { + strcat(verbose_str, "(young)"); + } else { + strcat(verbose_str, "(partial)"); } if (g1_policy()->during_initial_mark_pause()) { strcat(verbose_str, " (initial-mark)"); @@ -3350,10 +3332,8 @@ append_secondary_free_list_if_not_empty_with_lock(); } - if (g1_policy()->in_young_gc_mode()) { - assert(check_young_list_well_formed(), - "young list should be well formed"); - } + assert(check_young_list_well_formed(), + "young list should be well formed"); { // Call to jvmpi::post_class_unload_events must occur outside of active GC IsGCActiveMark x; @@ -3494,27 +3474,25 @@ // evacuation pause. clear_cset_fast_test(); - if (g1_policy()->in_young_gc_mode()) { - _young_list->reset_sampled_info(); - - // Don't check the whole heap at this point as the - // GC alloc regions from this pause have been tagged - // as survivors and moved on to the survivor list. - // Survivor regions will fail the !is_young() check. - assert(check_young_list_empty(false /* check_heap */), - "young list should be empty"); + _young_list->reset_sampled_info(); + + // Don't check the whole heap at this point as the + // GC alloc regions from this pause have been tagged + // as survivors and moved on to the survivor list. + // Survivor regions will fail the !is_young() check. + assert(check_young_list_empty(false /* check_heap */), + "young list should be empty"); #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("Before recording survivors.\nYoung List:"); - _young_list->print(); + gclog_or_tty->print_cr("Before recording survivors.\nYoung List:"); + _young_list->print(); #endif // YOUNG_LIST_VERBOSE - g1_policy()->record_survivor_regions(_young_list->survivor_length(), - _young_list->first_survivor_region(), - _young_list->last_survivor_region()); - - _young_list->reset_auxilary_lists(); - } + g1_policy()->record_survivor_regions(_young_list->survivor_length(), + _young_list->first_survivor_region(), + _young_list->last_survivor_region()); + + _young_list->reset_auxilary_lists(); if (evacuation_failed()) { _summary_bytes_used = recalculate_used(); @@ -3524,8 +3502,7 @@ _summary_bytes_used += g1_policy()->bytes_copied_during_gc(); } - if (g1_policy()->in_young_gc_mode() && - g1_policy()->during_initial_mark_pause()) { + if (g1_policy()->during_initial_mark_pause()) { concurrent_mark()->checkpointRootsInitialPost(); set_marking_started(); // CAUTION: after the doConcurrentMark() call below, @@ -3548,6 +3525,19 @@ init_mutator_alloc_region(); + { + size_t expand_bytes = g1_policy()->expansion_amount(); + if (expand_bytes > 0) { + size_t bytes_before = capacity(); + if (!expand(expand_bytes)) { + // We failed to expand the heap so let's verify that + // committed/uncommitted amount match the backing store + assert(capacity() == _g1_storage.committed_size(), "committed size mismatch"); + assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch"); + } + } + } + double end_time_sec = os::elapsedTime(); double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; g1_policy()->record_pause_time_ms(pause_time_ms); @@ -3593,6 +3583,8 @@ size_t expand_bytes = g1_policy()->expansion_amount(); if (expand_bytes > 0) { size_t bytes_before = capacity(); + // No need for an ergo verbose message here, + // expansion_amount() does this when it returns a value > 0. if (!expand(expand_bytes)) { // We failed to expand the heap so let's verify that // committed/uncommitted amount match the backing store @@ -3746,13 +3738,6 @@ bool do_object_b(oop p) { // It is reachable if it is outside the collection set, or is inside // and forwarded. - -#ifdef G1_DEBUG - gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d", - (void*) p, _g1->obj_in_cs(p), p->is_forwarded(), - !_g1->obj_in_cs(p) || p->is_forwarded()); -#endif // G1_DEBUG - return !_g1->obj_in_cs(p) || p->is_forwarded(); } }; @@ -3764,20 +3749,9 @@ void do_oop(narrowOop* p) { guarantee(false, "Not needed"); } void do_oop( oop* p) { oop obj = *p; -#ifdef G1_DEBUG - if (PrintGC && Verbose) { - gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT, - p, (void*) obj, (void*) *p); - } -#endif // G1_DEBUG - if (_g1->obj_in_cs(obj)) { assert( obj->is_forwarded(), "invariant" ); *p = obj->forwardee(); -#ifdef G1_DEBUG - gclog_or_tty->print_cr(" in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT, - (void*) obj, (void*) *p); -#endif // G1_DEBUG } } }; @@ -4083,6 +4057,23 @@ } #endif // PRODUCT +G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) : + ParGCAllocBuffer(gclab_word_size), + _should_mark_objects(false), + _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size), + _retired(false) +{ + //_should_mark_objects is set to true when G1ParCopyHelper needs to + // mark the forwarded location of an evacuated object. + // We set _should_mark_objects to true if marking is active, i.e. when we + // need to propagate a mark, or during an initial mark pause, i.e. when we + // need to mark objects immediately reachable by the roots. + if (G1CollectedHeap::heap()->mark_in_progress() || + G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) { + _should_mark_objects = true; + } +} + G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) : _g1h(g1h), _refs(g1h->task_queue(queue_num)), @@ -4198,12 +4189,14 @@ G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), - _par_scan_state(par_scan_state) { } - -template void G1ParCopyHelper::mark_forwardee(T* p) { - // This is called _after_ do_oop_work has been called, hence after - // the object has been relocated to its new location and *p points - // to its new location. + _par_scan_state(par_scan_state), + _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()), + _mark_in_progress(_g1->mark_in_progress()) { } + +template void G1ParCopyHelper::mark_object(T* p) { + // This is called from do_oop_work for objects that are not + // in the collection set. Objects in the collection set + // are marked after they have been evacuated. T heap_oop = oopDesc::load_heap_oop(p); if (!oopDesc::is_null(heap_oop)) { @@ -4215,7 +4208,7 @@ } } -oop G1ParCopyHelper::copy_to_survivor_space(oop old) { +oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_copy) { size_t word_sz = old->size(); HeapRegion* from_region = _g1->heap_region_containing_raw(old); // +1 to make the -1 indexes valid... @@ -4271,8 +4264,8 @@ obj->set_mark(m); } - // preserve "next" mark bit - if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) { + // Mark the evacuated object or propagate "next" mark bit + if (should_mark_copy) { if (!use_local_bitmaps || !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) { // if we couldn't mark it on the local bitmap (this happens when @@ -4280,11 +4273,12 @@ // the bullet and do the standard parallel mark _cm->markAndGrayObjectIfNecessary(obj); } -#if 1 + if (_g1->isMarkedNext(old)) { + // Unmark the object's old location so that marking + // doesn't think the old object is alive. _cm->nextMarkBitMap()->parClear((HeapWord*)old); } -#endif } size_t* surv_young_words = _par_scan_state->surviving_young_words(); @@ -4307,26 +4301,62 @@ return obj; } -template +template template -void G1ParCopyClosure +void G1ParCopyClosure ::do_oop_work(T* p) { oop obj = oopDesc::load_decode_heap_oop(p); assert(barrier != G1BarrierRS || obj != NULL, "Precondition: G1BarrierRS implies obj is nonNull"); + // Marking: + // If the object is in the collection set, then the thread + // that copies the object should mark, or propagate the + // mark to, the evacuated object. + // If the object is not in the collection set then we + // should call the mark_object() method depending on the + // value of the template parameter do_mark_object (which will + // be true for root scanning closures during an initial mark + // pause). + // The mark_object() method first checks whether the object + // is marked and, if not, attempts to mark the object. + // here the null check is implicit in the cset_fast_test() test if (_g1->in_cset_fast_test(obj)) { if (obj->is_forwarded()) { oopDesc::encode_store_heap_oop(p, obj->forwardee()); + // If we are a root scanning closure during an initial + // mark pause (i.e. do_mark_object will be true) then + // we also need to handle marking of roots in the + // event of an evacuation failure. In the event of an + // evacuation failure, the object is forwarded to itself + // and not copied so let's mark it here. + if (do_mark_object && obj->forwardee() == obj) { + mark_object(p); + } } else { - oop copy_oop = copy_to_survivor_space(obj); + // We need to mark the copied object if we're a root scanning + // closure during an initial mark pause (i.e. do_mark_object + // will be true), or the object is already marked and we need + // to propagate the mark to the evacuated copy. + bool should_mark_copy = do_mark_object || + _during_initial_mark || + (_mark_in_progress && !_g1->is_obj_ill(obj)); + + oop copy_oop = copy_to_survivor_space(obj, should_mark_copy); oopDesc::encode_store_heap_oop(p, copy_oop); } // When scanning the RS, we only care about objs in CS. if (barrier == G1BarrierRS) { _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } + } else { + // The object is not in collection set. If we're a root scanning + // closure during an initial mark pause (i.e. do_mark_object will + // be true) then attempt to mark the object. + if (do_mark_object) { + mark_object(p); + } } if (barrier == G1BarrierEvac && obj != NULL) { @@ -4571,7 +4601,6 @@ // keep entries (which are added by the marking threads) on them // live until they can be processed at the end of marking. ref_processor()->weak_oops_do(&buf_scan_non_heap_roots); - ref_processor()->oops_do(&buf_scan_non_heap_roots); } // Finish up any enqueued closure apps (attributed as object copy time). @@ -5091,7 +5120,6 @@ void G1CollectedHeap::empty_young_list() { assert(heap_lock_held_for_gc(), "the heap lock should already be held by or for this thread"); - assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode"); _young_list->empty_list(); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -1263,16 +1263,10 @@ // in the young gen: for the SATB pre-barrier, there is no // pre-value that needs to be remembered; for the remembered-set // update logging post-barrier, we don't maintain remembered set - // information for young gen objects. Note that non-generational - // G1 does not have any "young" objects, should not elide - // the rs logging barrier and so should always answer false below. - // However, non-generational G1 (-XX:-G1Gen) appears to have - // bit-rotted so was not tested below. + // information for young gen objects. virtual bool can_elide_initializing_store_barrier(oop new_obj) { // Re 6920090, 6920109 above. assert(ReduceInitialCardMarksForG1, "Else cannot be here"); - assert(G1Gen || !is_in_young(new_obj), - "Non-generational G1 should never return true below"); return is_in_young(new_obj); } @@ -1389,9 +1383,6 @@ // bitmap off to the side. void doConcurrentMark(); - // Do a full concurrent marking, synchronously. - void do_sync_mark(); - bool isMarkedPrev(oop obj) const; bool isMarkedNext(oop obj) const; @@ -1724,26 +1715,22 @@ class G1ParGCAllocBuffer: public ParGCAllocBuffer { private: bool _retired; - bool _during_marking; + bool _should_mark_objects; GCLabBitMap _bitmap; public: - G1ParGCAllocBuffer(size_t gclab_word_size) : - ParGCAllocBuffer(gclab_word_size), - _during_marking(G1CollectedHeap::heap()->mark_in_progress()), - _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size), - _retired(false) - { } + G1ParGCAllocBuffer(size_t gclab_word_size); inline bool mark(HeapWord* addr) { guarantee(use_local_bitmaps, "invariant"); - assert(_during_marking, "invariant"); + assert(_should_mark_objects, "invariant"); return _bitmap.mark(addr); } inline void set_buf(HeapWord* buf) { - if (use_local_bitmaps && _during_marking) + if (use_local_bitmaps && _should_mark_objects) { _bitmap.set_buffer(buf); + } ParGCAllocBuffer::set_buf(buf); _retired = false; } @@ -1751,7 +1738,7 @@ inline void retire(bool end_of_gc, bool retain) { if (_retired) return; - if (use_local_bitmaps && _during_marking) { + if (use_local_bitmaps && _should_mark_objects) { _bitmap.retire(); } ParGCAllocBuffer::retire(end_of_gc, retain); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -28,6 +28,7 @@ #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "gc_implementation/g1/heapRegionRemSet.hpp" #include "gc_implementation/shared/gcPolicyCounters.hpp" #include "runtime/arguments.hpp" @@ -145,6 +146,7 @@ _stop_world_start(0.0), _all_stop_world_times_ms(new NumberSeq()), _all_yield_times_ms(new NumberSeq()), + _using_new_ratio_calculations(false), _all_mod_union_times_ms(new NumberSeq()), @@ -170,7 +172,6 @@ _cur_aux_times_ms(new double[_aux_num]), _cur_aux_times_set(new bool[_aux_num]), - _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -201,7 +202,6 @@ // - _in_young_gc_mode(false), _full_young_gcs(true), _full_young_pause_num(0), _partial_young_pause_num(0), @@ -273,15 +273,26 @@ _recorded_survivor_tail(NULL), _survivors_age_table(true), - _gc_overhead_perc(0.0) - -{ + _gc_overhead_perc(0.0) { + // Set up the region size and associated fields. Given that the // policy is created before the heap, we have to set this up here, // so it's done as soon as possible. HeapRegion::setup_heap_region_size(Arguments::min_heap_size()); HeapRegionRemSet::setup_remset_size(); + G1ErgoVerbose::initialize(); + if (PrintAdaptiveSizePolicy) { + // Currently, we only use a single switch for all the heuristics. + G1ErgoVerbose::set_enabled(true); + // Given that we don't currently have a verboseness level + // parameter, we'll hardcode this to high. This can be easily + // changed in the future. + G1ErgoVerbose::set_level(ErgoHigh); + } else { + G1ErgoVerbose::set_enabled(false); + } + // Verify PLAB sizes const uint region_size = HeapRegion::GrainWords; if (YoungPLABSize > region_size || OldPLABSize > region_size) { @@ -400,21 +411,30 @@ _sigma = (double) G1ConfidencePercent / 100.0; // start conservatively (around 50ms is about right) - _concurrent_mark_init_times_ms->add(0.05); _concurrent_mark_remark_times_ms->add(0.05); _concurrent_mark_cleanup_times_ms->add(0.20); _tenuring_threshold = MaxTenuringThreshold; - - // if G1FixedSurvivorSpaceSize is 0 which means the size is not - // fixed, then _max_survivor_regions will be calculated at - // calculate_young_list_target_length during initialization - _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; + // _max_survivor_regions will be calculated by + // update_young_list_target_length() during initialization. + _max_survivor_regions = 0; assert(GCTimeRatio > 0, "we should have set it to a default value set_g1_gc_flags() " "if a user set it to 0"); _gc_overhead_perc = 100.0 * (1.0 / (1.0 + GCTimeRatio)); + uintx reserve_perc = G1ReservePercent; + // Put an artificial ceiling on this so that it's not set to a silly value. + if (reserve_perc > 50) { + reserve_perc = 50; + warning("G1ReservePercent is set to a value that is too large, " + "it's been updated to %u", reserve_perc); + } + _reserve_factor = (double) reserve_perc / 100.0; + // This will be set when the heap is expanded + // for the first time during initialization. + _reserve_regions = 0; + initialize_all(); } @@ -439,16 +459,15 @@ // ParallelScavengeHeap::initialize()). We might change this in the // future, but it's a good start. class G1YoungGenSizer : public TwoGenerationCollectorPolicy { - size_t size_to_region_num(size_t byte_size) { - return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes); - } public: G1YoungGenSizer() { initialize_flags(); initialize_size_info(); } - + size_t size_to_region_num(size_t byte_size) { + return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes); + } size_t min_young_region_num() { return size_to_region_num(_min_gen0_size); } @@ -460,6 +479,13 @@ } }; +void G1CollectorPolicy::update_young_list_size_using_newratio(size_t number_of_heap_regions) { + assert(number_of_heap_regions > 0, "Heap must be initialized"); + size_t young_size = number_of_heap_regions / (NewRatio + 1); + _min_desired_young_length = young_size; + _max_desired_young_length = young_size; +} + void G1CollectorPolicy::init() { // Set aside an initial future to_space. _g1 = G1CollectedHeap::heap(); @@ -468,27 +494,37 @@ initialize_gc_policy_counters(); - if (G1Gen) { - _in_young_gc_mode = true; - - G1YoungGenSizer sizer; - size_t initial_region_num = sizer.initial_young_region_num(); - - if (UseAdaptiveSizePolicy) { - set_adaptive_young_list_length(true); - _young_list_fixed_length = 0; + G1YoungGenSizer sizer; + size_t initial_region_num = sizer.initial_young_region_num(); + _min_desired_young_length = sizer.min_young_region_num(); + _max_desired_young_length = sizer.max_young_region_num(); + + if (FLAG_IS_CMDLINE(NewRatio)) { + if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) { + gclog_or_tty->print_cr("-XX:NewSize and -XX:MaxNewSize overrides -XX:NewRatio"); } else { - set_adaptive_young_list_length(false); - _young_list_fixed_length = initial_region_num; + // Treat NewRatio as a fixed size that is only recalculated when the heap size changes + size_t heap_regions = sizer.size_to_region_num(_g1->n_regions()); + update_young_list_size_using_newratio(heap_regions); + _using_new_ratio_calculations = true; } - _free_regions_at_end_of_collection = _g1->free_regions(); - calculate_young_list_min_length(); - guarantee( _young_list_min_length == 0, "invariant, not enough info" ); - calculate_young_list_target_length(); + } + + // GenCollectorPolicy guarantees that min <= initial <= max. + // Asserting here just to state that we rely on this property. + assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values"); + assert(initial_region_num <= _max_desired_young_length, "Initial young gen size too large"); + assert(_min_desired_young_length <= initial_region_num, "Initial young gen size too small"); + + set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length); + if (adaptive_young_list_length()) { + _young_list_fixed_length = 0; } else { - _young_list_fixed_length = 0; - _in_young_gc_mode = false; + _young_list_fixed_length = initial_region_num; } + _free_regions_at_end_of_collection = _g1->free_regions(); + update_young_list_target_length(); + _prev_eden_capacity = _young_list_target_length * HeapRegion::GrainBytes; // We may immediately start allocating regions and placing them on the // collection set list. Initialize the per-collection set info @@ -496,238 +532,261 @@ } // Create the jstat counters for the policy. -void G1CollectorPolicy::initialize_gc_policy_counters() -{ - _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen); -} - -void G1CollectorPolicy::calculate_young_list_min_length() { - _young_list_min_length = 0; - - if (!adaptive_young_list_length()) - return; - - if (_alloc_rate_ms_seq->num() > 3) { - double now_sec = os::elapsedTime(); - double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0; - double alloc_rate_ms = predict_alloc_rate_ms(); - size_t min_regions = (size_t) ceil(alloc_rate_ms * when_ms); - size_t current_region_num = _g1->young_list()->length(); - _young_list_min_length = min_regions + current_region_num; - } -} - -void G1CollectorPolicy::calculate_young_list_target_length() { - if (adaptive_young_list_length()) { - size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); - calculate_young_list_target_length(rs_lengths); - } else { - if (full_young_gcs()) - _young_list_target_length = _young_list_fixed_length; - else - _young_list_target_length = _young_list_fixed_length / 2; - } - - // Make sure we allow the application to allocate at least one - // region before we need to do a collection again. - size_t min_length = _g1->young_list()->length() + 1; - _young_list_target_length = MAX2(_young_list_target_length, min_length); - calculate_max_gc_locker_expansion(); - calculate_survivors_policy(); +void G1CollectorPolicy::initialize_gc_policy_counters() { + _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 3); } -void G1CollectorPolicy::calculate_young_list_target_length(size_t rs_lengths) { - guarantee( adaptive_young_list_length(), "pre-condition" ); - guarantee( !_in_marking_window || !_last_full_young_gc, "invariant" ); - - double start_time_sec = os::elapsedTime(); - size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1ReservePercent); - min_reserve_perc = MIN2((size_t) 50, min_reserve_perc); - size_t reserve_regions = - (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0); - - if (full_young_gcs() && _free_regions_at_end_of_collection > 0) { - // we are in fully-young mode and there are free regions in the heap - - double survivor_regions_evac_time = - predict_survivor_regions_evac_time(); - - double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; - size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); - size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); - size_t scanned_cards = predict_young_card_num(adj_rs_lengths); - double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards) - + survivor_regions_evac_time; - - // the result - size_t final_young_length = 0; - - size_t init_free_regions = - MAX2((size_t)0, _free_regions_at_end_of_collection - reserve_regions); - - // if we're still under the pause target... - if (base_time_ms <= target_pause_time_ms) { - // We make sure that the shortest young length that makes sense - // fits within the target pause time. - size_t min_young_length = 1; - - if (predict_will_fit(min_young_length, base_time_ms, - init_free_regions, target_pause_time_ms)) { - // The shortest young length will fit within the target pause time; - // we'll now check whether the absolute maximum number of young - // regions will fit in the target pause time. If not, we'll do - // a binary search between min_young_length and max_young_length - size_t abs_max_young_length = _free_regions_at_end_of_collection - 1; - size_t max_young_length = abs_max_young_length; - - if (max_young_length > min_young_length) { - // Let's check if the initial max young length will fit within the - // target pause. If so then there is no need to search for a maximal - // young length - we'll return the initial maximum - - if (predict_will_fit(max_young_length, base_time_ms, - init_free_regions, target_pause_time_ms)) { - // The maximum young length will satisfy the target pause time. - // We are done so set min young length to this maximum length. - // The code after the loop will then set final_young_length using - // the value cached in the minimum length. - min_young_length = max_young_length; - } else { - // The maximum possible number of young regions will not fit within - // the target pause time so let's search.... - - size_t diff = (max_young_length - min_young_length) / 2; - max_young_length = min_young_length + diff; - - while (max_young_length > min_young_length) { - if (predict_will_fit(max_young_length, base_time_ms, - init_free_regions, target_pause_time_ms)) { - - // The current max young length will fit within the target - // pause time. Note we do not exit the loop here. By setting - // min = max, and then increasing the max below means that - // we will continue searching for an upper bound in the - // range [max..max+diff] - min_young_length = max_young_length; - } - diff = (max_young_length - min_young_length) / 2; - max_young_length = min_young_length + diff; - } - // the above loop found a maximal young length that will fit - // within the target pause time. - } - assert(min_young_length <= abs_max_young_length, "just checking"); - } - final_young_length = min_young_length; - } - } - // and we're done! - - // we should have at least one region in the target young length - _young_list_target_length = - final_young_length + _recorded_survivor_regions; - - // let's keep an eye of how long we spend on this calculation - // right now, I assume that we'll print it when we need it; we - // should really adde it to the breakdown of a pause - double end_time_sec = os::elapsedTime(); - double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0; - -#ifdef TRACE_CALC_YOUNG_LENGTH - // leave this in for debugging, just in case - gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT ", " - "elapsed %1.2lf ms, (%s%s) " SIZE_FORMAT SIZE_FORMAT, - target_pause_time_ms, - _young_list_target_length - elapsed_time_ms, - full_young_gcs() ? "full" : "partial", - during_initial_mark_pause() ? " i-m" : "", - _in_marking_window, - _in_marking_window_im); -#endif // TRACE_CALC_YOUNG_LENGTH - - if (_young_list_target_length < _young_list_min_length) { - // bummer; this means that, if we do a pause when the maximal - // length dictates, we'll violate the pause spacing target (the - // min length was calculate based on the application's current - // alloc rate); - - // so, we have to bite the bullet, and allocate the minimum - // number. We'll violate our target, but we just can't meet it. - -#ifdef TRACE_CALC_YOUNG_LENGTH - // leave this in for debugging, just in case - gclog_or_tty->print_cr("adjusted target length from " - SIZE_FORMAT " to " SIZE_FORMAT, - _young_list_target_length, _young_list_min_length); -#endif // TRACE_CALC_YOUNG_LENGTH - - _young_list_target_length = _young_list_min_length; - } - } else { - // we are in a partially-young mode or we've run out of regions (due - // to evacuation failure) - -#ifdef TRACE_CALC_YOUNG_LENGTH - // leave this in for debugging, just in case - gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT - _young_list_min_length); -#endif // TRACE_CALC_YOUNG_LENGTH - // we'll do the pause as soon as possible by choosing the minimum - _young_list_target_length = _young_list_min_length; - } - - _rs_lengths_prediction = rs_lengths; -} - -// This is used by: calculate_young_list_target_length(rs_length). It -// returns true iff: -// the predicted pause time for the given young list will not overflow -// the target pause time -// and: -// the predicted amount of surviving data will not overflow the -// the amount of free space available for survivor regions. -// -bool -G1CollectorPolicy::predict_will_fit(size_t young_length, - double base_time_ms, - size_t init_free_regions, - double target_pause_time_ms) { - - if (young_length >= init_free_regions) +bool G1CollectorPolicy::predict_will_fit(size_t young_length, + double base_time_ms, + size_t base_free_regions, + double target_pause_time_ms) { + if (young_length >= base_free_regions) { // end condition 1: not enough space for the young regions return false; - - double accum_surv_rate_adj = 0.0; - double accum_surv_rate = - accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj; - + } + + double accum_surv_rate = accum_yg_surv_rate_pred((int)(young_length - 1)); size_t bytes_to_copy = - (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); - + (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy); - - double young_other_time_ms = - predict_young_other_time_ms(young_length); - - double pause_time_ms = - base_time_ms + copy_time_ms + young_other_time_ms; - - if (pause_time_ms > target_pause_time_ms) - // end condition 2: over the target pause time + double young_other_time_ms = predict_young_other_time_ms(young_length); + double pause_time_ms = base_time_ms + copy_time_ms + young_other_time_ms; + if (pause_time_ms > target_pause_time_ms) { + // end condition 2: prediction is over the target pause time return false; + } size_t free_bytes = - (init_free_regions - young_length) * HeapRegion::GrainBytes; - - if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes) - // end condition 3: out of to-space (conservatively) + (base_free_regions - young_length) * HeapRegion::GrainBytes; + if ((2.0 * sigma()) * (double) bytes_to_copy > (double) free_bytes) { + // end condition 3: out-of-space (conservatively!) return false; + } // success! return true; } +void G1CollectorPolicy::record_new_heap_size(size_t new_number_of_regions) { + // re-calculate the necessary reserve + double reserve_regions_d = (double) new_number_of_regions * _reserve_factor; + // We use ceiling so that if reserve_regions_d is > 0.0 (but + // smaller than 1.0) we'll get 1. + _reserve_regions = (size_t) ceil(reserve_regions_d); + + if (_using_new_ratio_calculations) { + // -XX:NewRatio was specified so we need to update the + // young gen length when the heap size has changed. + update_young_list_size_using_newratio(new_number_of_regions); + } +} + +size_t G1CollectorPolicy::calculate_young_list_desired_min_length( + size_t base_min_length) { + size_t desired_min_length = 0; + if (adaptive_young_list_length()) { + if (_alloc_rate_ms_seq->num() > 3) { + double now_sec = os::elapsedTime(); + double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0; + double alloc_rate_ms = predict_alloc_rate_ms(); + desired_min_length = (size_t) ceil(alloc_rate_ms * when_ms); + } else { + // otherwise we don't have enough info to make the prediction + } + } + desired_min_length += base_min_length; + // make sure we don't go below any user-defined minimum bound + return MAX2(_min_desired_young_length, desired_min_length); +} + +size_t G1CollectorPolicy::calculate_young_list_desired_max_length() { + // Here, we might want to also take into account any additional + // constraints (i.e., user-defined minimum bound). Currently, we + // effectively don't set this bound. + return _max_desired_young_length; +} + +void G1CollectorPolicy::update_young_list_target_length(size_t rs_lengths) { + if (rs_lengths == (size_t) -1) { + // if it's set to the default value (-1), we should predict it; + // otherwise, use the given value. + rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); + } + + // Calculate the absolute and desired min bounds. + + // This is how many young regions we already have (currently: the survivors). + size_t base_min_length = recorded_survivor_regions(); + // This is the absolute minimum young length, which ensures that we + // can allocate one eden region in the worst-case. + size_t absolute_min_length = base_min_length + 1; + size_t desired_min_length = + calculate_young_list_desired_min_length(base_min_length); + if (desired_min_length < absolute_min_length) { + desired_min_length = absolute_min_length; + } + + // Calculate the absolute and desired max bounds. + + // We will try our best not to "eat" into the reserve. + size_t absolute_max_length = 0; + if (_free_regions_at_end_of_collection > _reserve_regions) { + absolute_max_length = _free_regions_at_end_of_collection - _reserve_regions; + } + size_t desired_max_length = calculate_young_list_desired_max_length(); + if (desired_max_length > absolute_max_length) { + desired_max_length = absolute_max_length; + } + + size_t young_list_target_length = 0; + if (adaptive_young_list_length()) { + if (full_young_gcs()) { + young_list_target_length = + calculate_young_list_target_length(rs_lengths, + base_min_length, + desired_min_length, + desired_max_length); + _rs_lengths_prediction = rs_lengths; + } else { + // Don't calculate anything and let the code below bound it to + // the desired_min_length, i.e., do the next GC as soon as + // possible to maximize how many old regions we can add to it. + } + } else { + if (full_young_gcs()) { + young_list_target_length = _young_list_fixed_length; + } else { + // A bit arbitrary: during partially-young GCs we allocate half + // the young regions to try to add old regions to the CSet. + young_list_target_length = _young_list_fixed_length / 2; + // We choose to accept that we might go under the desired min + // length given that we intentionally ask for a smaller young gen. + desired_min_length = absolute_min_length; + } + } + + // Make sure we don't go over the desired max length, nor under the + // desired min length. In case they clash, desired_min_length wins + // which is why that test is second. + if (young_list_target_length > desired_max_length) { + young_list_target_length = desired_max_length; + } + if (young_list_target_length < desired_min_length) { + young_list_target_length = desired_min_length; + } + + assert(young_list_target_length > recorded_survivor_regions(), + "we should be able to allocate at least one eden region"); + assert(young_list_target_length >= absolute_min_length, "post-condition"); + _young_list_target_length = young_list_target_length; + + update_max_gc_locker_expansion(); +} + +size_t +G1CollectorPolicy::calculate_young_list_target_length(size_t rs_lengths, + size_t base_min_length, + size_t desired_min_length, + size_t desired_max_length) { + assert(adaptive_young_list_length(), "pre-condition"); + assert(full_young_gcs(), "only call this for fully-young GCs"); + + // In case some edge-condition makes the desired max length too small... + if (desired_max_length <= desired_min_length) { + return desired_min_length; + } + + // We'll adjust min_young_length and max_young_length not to include + // the already allocated young regions (i.e., so they reflect the + // min and max eden regions we'll allocate). The base_min_length + // will be reflected in the predictions by the + // survivor_regions_evac_time prediction. + assert(desired_min_length > base_min_length, "invariant"); + size_t min_young_length = desired_min_length - base_min_length; + assert(desired_max_length > base_min_length, "invariant"); + size_t max_young_length = desired_max_length - base_min_length; + + double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + double survivor_regions_evac_time = predict_survivor_regions_evac_time(); + size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); + size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); + size_t scanned_cards = predict_young_card_num(adj_rs_lengths); + double base_time_ms = + predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + survivor_regions_evac_time; + size_t available_free_regions = _free_regions_at_end_of_collection; + size_t base_free_regions = 0; + if (available_free_regions > _reserve_regions) { + base_free_regions = available_free_regions - _reserve_regions; + } + + // Here, we will make sure that the shortest young length that + // makes sense fits within the target pause time. + + if (predict_will_fit(min_young_length, base_time_ms, + base_free_regions, target_pause_time_ms)) { + // The shortest young length will fit into the target pause time; + // we'll now check whether the absolute maximum number of young + // regions will fit in the target pause time. If not, we'll do + // a binary search between min_young_length and max_young_length. + if (predict_will_fit(max_young_length, base_time_ms, + base_free_regions, target_pause_time_ms)) { + // The maximum young length will fit into the target pause time. + // We are done so set min young length to the maximum length (as + // the result is assumed to be returned in min_young_length). + min_young_length = max_young_length; + } else { + // The maximum possible number of young regions will not fit within + // the target pause time so we'll search for the optimal + // length. The loop invariants are: + // + // min_young_length < max_young_length + // min_young_length is known to fit into the target pause time + // max_young_length is known not to fit into the target pause time + // + // Going into the loop we know the above hold as we've just + // checked them. Every time around the loop we check whether + // the middle value between min_young_length and + // max_young_length fits into the target pause time. If it + // does, it becomes the new min. If it doesn't, it becomes + // the new max. This way we maintain the loop invariants. + + assert(min_young_length < max_young_length, "invariant"); + size_t diff = (max_young_length - min_young_length) / 2; + while (diff > 0) { + size_t young_length = min_young_length + diff; + if (predict_will_fit(young_length, base_time_ms, + base_free_regions, target_pause_time_ms)) { + min_young_length = young_length; + } else { + max_young_length = young_length; + } + assert(min_young_length < max_young_length, "invariant"); + diff = (max_young_length - min_young_length) / 2; + } + // The results is min_young_length which, according to the + // loop invariants, should fit within the target pause time. + + // These are the post-conditions of the binary search above: + assert(min_young_length < max_young_length, + "otherwise we should have discovered that max_young_length " + "fits into the pause target and not done the binary search"); + assert(predict_will_fit(min_young_length, base_time_ms, + base_free_regions, target_pause_time_ms), + "min_young_length, the result of the binary search, should " + "fit into the pause target"); + assert(!predict_will_fit(min_young_length + 1, base_time_ms, + base_free_regions, target_pause_time_ms), + "min_young_length, the result of the binary search, should be " + "optimal, so no larger length should fit into the pause target"); + } + } else { + // Even the minimum length doesn't fit into the pause time + // target, return it as the result nevertheless. + } + return base_min_length + min_young_length; +} + double G1CollectorPolicy::predict_survivor_regions_evac_time() { double survivor_regions_evac_time = 0.0; for (HeapRegion * r = _recorded_survivor_head; @@ -738,17 +797,19 @@ return survivor_regions_evac_time; } -void G1CollectorPolicy::check_prediction_validity() { +void G1CollectorPolicy::revise_young_list_target_length_if_necessary() { guarantee( adaptive_young_list_length(), "should not call this otherwise" ); size_t rs_lengths = _g1->young_list()->sampled_rs_lengths(); if (rs_lengths > _rs_lengths_prediction) { // add 10% to avoid having to recalculate often size_t rs_lengths_prediction = rs_lengths * 1100 / 1000; - calculate_young_list_target_length(rs_lengths_prediction); + update_young_list_target_length(rs_lengths_prediction); } } + + HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size, bool is_tlab, bool* gc_overhead_limit_was_exceeded) { @@ -855,8 +916,7 @@ _free_regions_at_end_of_collection = _g1->free_regions(); // Reset survivors SurvRateGroup. _survivor_surv_rate_group->reset(); - calculate_young_list_min_length(); - calculate_young_list_target_length(); + update_young_list_target_length(); } void G1CollectorPolicy::record_stop_world_start() { @@ -868,10 +928,14 @@ if (PrintGCDetails) { gclog_or_tty->stamp(PrintGCTimeStamps); gclog_or_tty->print("[GC pause"); - if (in_young_gc_mode()) - gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial"); + gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial"); } + // We only need to do this here as the policy will only be applied + // to the GC we're about to start. so, no point is calculating this + // every time we calculate / recalculate the target young length. + update_survivors_policy(); + assert(_g1->used() == _g1->recalculate_used(), err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT, _g1->used(), _g1->recalculate_used())); @@ -921,8 +985,7 @@ _satb_drain_time_set = false; _last_satb_drain_processed_buffers = -1; - if (in_young_gc_mode()) - _last_young_gc_full = false; + _last_young_gc_full = false; // do that for any other surv rate groups _short_lived_surv_rate_group->stop_adding_regions(); @@ -935,12 +998,7 @@ _mark_closure_time_ms = mark_closure_time_ms; } -void G1CollectorPolicy::record_concurrent_mark_init_start() { - _mark_init_start_sec = os::elapsedTime(); - guarantee(!in_young_gc_mode(), "should not do be here in young GC mode"); -} - -void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double +void G1CollectorPolicy::record_concurrent_mark_init_end(double mark_init_elapsed_time_ms) { _during_marking = true; assert(!initiate_conc_mark_if_possible(), "we should have cleared it by now"); @@ -948,15 +1006,6 @@ _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms; } -void G1CollectorPolicy::record_concurrent_mark_init_end() { - double end_time_sec = os::elapsedTime(); - double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0; - _concurrent_mark_init_times_ms->add(elapsed_time_ms); - record_concurrent_mark_init_end_pre(elapsed_time_ms); - - _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true); -} - void G1CollectorPolicy::record_concurrent_mark_remark_start() { _mark_remark_start_sec = os::elapsedTime(); _during_marking = false; @@ -987,11 +1036,9 @@ G1CollectorPolicy:: record_concurrent_mark_cleanup_end_work1(size_t freed_bytes, size_t max_live_bytes) { - if (_n_marks < 2) _n_marks++; - if (G1PolicyVerbose > 0) - gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB " - " (of " SIZE_FORMAT " MB heap).", - max_live_bytes/M, _g1->capacity()/M); + if (_n_marks < 2) { + _n_marks++; + } } // The important thing about this is that it includes "os::elapsedTime". @@ -1005,27 +1052,15 @@ _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true); _num_markings++; - - // We did a marking, so reset the "since_last_mark" variables. - double considerConcMarkCost = 1.0; - // If there are available processors, concurrent activity is free... - if (Threads::number_of_non_daemon_threads() * 2 < - os::active_processor_count()) { - considerConcMarkCost = 0.0; - } _n_pauses_at_mark_end = _n_pauses; _n_marks_since_last_pause++; } void G1CollectorPolicy::record_concurrent_mark_cleanup_completed() { - if (in_young_gc_mode()) { - _should_revert_to_full_young_gcs = false; - _last_full_young_gc = true; - _in_marking_window = false; - if (adaptive_young_list_length()) - calculate_young_list_target_length(); - } + _should_revert_to_full_young_gcs = false; + _last_full_young_gc = true; + _in_marking_window = false; } void G1CollectorPolicy::record_concurrent_pause() { @@ -1174,31 +1209,46 @@ } #endif // PRODUCT - if (in_young_gc_mode()) { - last_pause_included_initial_mark = during_initial_mark_pause(); - if (last_pause_included_initial_mark) - record_concurrent_mark_init_end_pre(0.0); - - size_t min_used_targ = - (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent; - - - if (!_g1->mark_in_progress() && !_last_full_young_gc) { - assert(!last_pause_included_initial_mark, "invariant"); - if (cur_used_bytes > min_used_targ && - cur_used_bytes > _prev_collection_pause_used_at_end_bytes) { + last_pause_included_initial_mark = during_initial_mark_pause(); + if (last_pause_included_initial_mark) + record_concurrent_mark_init_end(0.0); + + size_t marking_initiating_used_threshold = + (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent; + + if (!_g1->mark_in_progress() && !_last_full_young_gc) { + assert(!last_pause_included_initial_mark, "invariant"); + if (cur_used_bytes > marking_initiating_used_threshold) { + if (cur_used_bytes > _prev_collection_pause_used_at_end_bytes) { assert(!during_initial_mark_pause(), "we should not see this here"); + ergo_verbose3(ErgoConcCycles, + "request concurrent cycle initiation", + ergo_format_reason("occupancy higher than threshold") + ergo_format_byte("occupancy") + ergo_format_byte_perc("threshold"), + cur_used_bytes, + marking_initiating_used_threshold, + (double) InitiatingHeapOccupancyPercent); + // Note: this might have already been set, if during the last // pause we decided to start a cycle but at the beginning of // this pause we decided to postpone it. That's OK. set_initiate_conc_mark_if_possible(); + } else { + ergo_verbose2(ErgoConcCycles, + "do not request concurrent cycle initiation", + ergo_format_reason("occupancy lower than previous occupancy") + ergo_format_byte("occupancy") + ergo_format_byte("previous occupancy"), + cur_used_bytes, + _prev_collection_pause_used_at_end_bytes); } } - - _prev_collection_pause_used_at_end_bytes = cur_used_bytes; } + _prev_collection_pause_used_at_end_bytes = cur_used_bytes; + _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, end_time_sec, false); @@ -1468,24 +1518,52 @@ new_in_marking_window_im = true; } - if (in_young_gc_mode()) { - if (_last_full_young_gc) { - set_full_young_gcs(false); - _last_full_young_gc = false; + if (_last_full_young_gc) { + ergo_verbose2(ErgoPartiallyYoungGCs, + "start partially-young GCs", + ergo_format_byte_perc("known garbage"), + _known_garbage_bytes, _known_garbage_ratio * 100.0); + set_full_young_gcs(false); + _last_full_young_gc = false; + } + + if ( !_last_young_gc_full ) { + if (_should_revert_to_full_young_gcs) { + ergo_verbose2(ErgoPartiallyYoungGCs, + "end partially-young GCs", + ergo_format_reason("partially-young GCs end requested") + ergo_format_byte_perc("known garbage"), + _known_garbage_bytes, _known_garbage_ratio * 100.0); + set_full_young_gcs(true); + } else if (_known_garbage_ratio < 0.05) { + ergo_verbose3(ErgoPartiallyYoungGCs, + "end partially-young GCs", + ergo_format_reason("known garbage percent lower than threshold") + ergo_format_byte_perc("known garbage") + ergo_format_perc("threshold"), + _known_garbage_bytes, _known_garbage_ratio * 100.0, + 0.05 * 100.0); + set_full_young_gcs(true); + } else if (adaptive_young_list_length() && + (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) { + ergo_verbose5(ErgoPartiallyYoungGCs, + "end partially-young GCs", + ergo_format_reason("current GC efficiency lower than " + "predicted fully-young GC efficiency") + ergo_format_double("GC efficiency factor") + ergo_format_double("current GC efficiency") + ergo_format_double("predicted fully-young GC efficiency") + ergo_format_byte_perc("known garbage"), + get_gc_eff_factor(), cur_efficiency, + predict_young_gc_eff(), + _known_garbage_bytes, _known_garbage_ratio * 100.0); + set_full_young_gcs(true); } - - if ( !_last_young_gc_full ) { - if ( _should_revert_to_full_young_gcs || - _known_garbage_ratio < 0.05 || - (adaptive_young_list_length() && - (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) { - set_full_young_gcs(true); - } - } - _should_revert_to_full_young_gcs = false; - - if (_last_young_gc_full && !_during_marking) - _young_gc_eff_seq->add(cur_efficiency); + } + _should_revert_to_full_young_gcs = false; + + if (_last_young_gc_full && !_during_marking) { + _young_gc_eff_seq->add(cur_efficiency); } _short_lived_surv_rate_group->start_adding_regions(); @@ -1633,8 +1711,7 @@ _in_marking_window = new_in_marking_window; _in_marking_window_im = new_in_marking_window_im; _free_regions_at_end_of_collection = _g1->free_regions(); - calculate_young_list_min_length(); - calculate_young_list_target_length(); + update_young_list_target_length(); // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; @@ -1655,20 +1732,26 @@ size_t used_before_gc = _cur_collection_pause_used_at_start_bytes; size_t used = _g1->used(); size_t capacity = _g1->capacity(); + size_t eden_capacity = + (_young_list_target_length * HeapRegion::GrainBytes) - survivor_bytes; gclog_or_tty->print_cr( - " [Eden: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" " - "Survivors: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" " - "Heap: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->" - EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")]", - EXT_SIZE_PARAMS(_eden_bytes_before_gc), - EXT_SIZE_PARAMS(eden_bytes), - EXT_SIZE_PARAMS(_survivor_bytes_before_gc), - EXT_SIZE_PARAMS(survivor_bytes), - EXT_SIZE_PARAMS(used_before_gc), - EXT_SIZE_PARAMS(_capacity_before_gc), - EXT_SIZE_PARAMS(used), - EXT_SIZE_PARAMS(capacity)); + " [Eden: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->"EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT") " + "Survivors: "EXT_SIZE_FORMAT"->"EXT_SIZE_FORMAT" " + "Heap: "EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")->" + EXT_SIZE_FORMAT"("EXT_SIZE_FORMAT")]", + EXT_SIZE_PARAMS(_eden_bytes_before_gc), + EXT_SIZE_PARAMS(_prev_eden_capacity), + EXT_SIZE_PARAMS(eden_bytes), + EXT_SIZE_PARAMS(eden_capacity), + EXT_SIZE_PARAMS(_survivor_bytes_before_gc), + EXT_SIZE_PARAMS(survivor_bytes), + EXT_SIZE_PARAMS(used_before_gc), + EXT_SIZE_PARAMS(_capacity_before_gc), + EXT_SIZE_PARAMS(used), + EXT_SIZE_PARAMS(capacity)); + + _prev_eden_capacity = eden_capacity; } else if (PrintGC) { _g1->print_size_transition(gclog_or_tty, _cur_collection_pause_used_at_start_bytes, @@ -1910,18 +1993,14 @@ // I don't think we need to do this when in young GC mode since // marking will be initiated next time we hit the soft limit anyway... if (predicted_time_ms > _expensive_region_limit_ms) { - if (!in_young_gc_mode()) { - set_full_young_gcs(true); - // We might want to do something different here. However, - // right now we don't support the non-generational G1 mode - // (and in fact we are planning to remove the associated code, - // see CR 6814390). So, let's leave it as is and this will be - // removed some time in the future - ShouldNotReachHere(); - set_during_initial_mark_pause(); - } else - // no point in doing another partial one - _should_revert_to_full_young_gcs = true; + ergo_verbose2(ErgoPartiallyYoungGCs, + "request partially-young GCs end", + ergo_format_reason("predicted region time higher than threshold") + ergo_format_ms("predicted region time") + ergo_format_ms("threshold"), + predicted_time_ms, _expensive_region_limit_ms); + // no point in doing another partial one + _should_revert_to_full_young_gcs = true; } } @@ -2029,7 +2108,9 @@ } size_t G1CollectorPolicy::expansion_amount() { - if ((recent_avg_pause_time_ratio() * 100.0) > _gc_overhead_perc) { + double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0; + double threshold = _gc_overhead_perc; + if (recent_gc_overhead > threshold) { // We will double the existing space, or take // G1ExpandByPercentOfAvailable % of the available expansion // space, whichever is smaller, bounded below by a minimum @@ -2044,20 +2125,19 @@ expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes); expand_bytes = MAX2(expand_bytes, min_expand_bytes); expand_bytes = MIN2(expand_bytes, uncommitted_bytes); - if (G1PolicyVerbose > 1) { - gclog_or_tty->print("Decided to expand: ratio = %5.2f, " - "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n" - " Answer = %d.\n", - recent_avg_pause_time_ratio(), - byte_size_in_proper_unit(committed_bytes), - proper_unit_for_byte_size(committed_bytes), - byte_size_in_proper_unit(uncommitted_bytes), - proper_unit_for_byte_size(uncommitted_bytes), - byte_size_in_proper_unit(expand_bytes_via_pct), - proper_unit_for_byte_size(expand_bytes_via_pct), - byte_size_in_proper_unit(expand_bytes), - proper_unit_for_byte_size(expand_bytes)); - } + + ergo_verbose5(ErgoHeapSizing, + "attempt heap expansion", + ergo_format_reason("recent GC overhead higher than " + "threshold after GC") + ergo_format_perc("recent GC overhead") + ergo_format_perc("threshold") + ergo_format_byte("uncommitted") + ergo_format_byte_perc("calculated expansion amount"), + recent_gc_overhead, threshold, + uncommitted_bytes, + expand_bytes_via_pct, (double) G1ExpandByPercentOfAvailable); + return expand_bytes; } else { return 0; @@ -2280,8 +2360,7 @@ #endif // PRODUCT } -void -G1CollectorPolicy::update_region_num(bool young) { +void G1CollectorPolicy::update_region_num(bool young) { if (young) { ++_region_num_young; } else { @@ -2313,7 +2392,7 @@ }; } -void G1CollectorPolicy::calculate_max_gc_locker_expansion() { +void G1CollectorPolicy::update_max_gc_locker_expansion() { size_t expansion_region_num = 0; if (GCLockerEdenExpansionPercent > 0) { double perc = (double) GCLockerEdenExpansionPercent / 100.0; @@ -2329,20 +2408,15 @@ } // Calculates survivor space parameters. -void G1CollectorPolicy::calculate_survivors_policy() -{ - if (G1FixedSurvivorSpaceSize == 0) { - _max_survivor_regions = _young_list_target_length / SurvivorRatio; - } else { - _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; - } - - if (G1FixedTenuringThreshold) { - _tenuring_threshold = MaxTenuringThreshold; - } else { - _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold( +void G1CollectorPolicy::update_survivors_policy() { + double max_survivor_regions_d = + (double) _young_list_target_length / (double) SurvivorRatio; + // We use ceiling so that if max_survivor_regions_d is > 0.0 (but + // smaller than 1.0) we'll get 1. + _max_survivor_regions = (size_t) ceil(max_survivor_regions_d); + + _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold( HeapRegion::GrainWords * _max_survivor_regions); - } } #ifndef PRODUCT @@ -2367,13 +2441,23 @@ } #endif -bool -G1CollectorPolicy::force_initial_mark_if_outside_cycle() { +bool G1CollectorPolicy::force_initial_mark_if_outside_cycle( + GCCause::Cause gc_cause) { bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle(); if (!during_cycle) { + ergo_verbose1(ErgoConcCycles, + "request concurrent cycle initiation", + ergo_format_reason("requested by GC cause") + ergo_format_str("GC cause"), + GCCause::to_string(gc_cause)); set_initiate_conc_mark_if_possible(); return true; } else { + ergo_verbose1(ErgoConcCycles, + "do not request concurrent cycle initiation", + ergo_format_reason("concurrent cycle already in progress") + ergo_format_str("GC cause"), + GCCause::to_string(gc_cause)); return false; } } @@ -2405,6 +2489,10 @@ // And we can now clear initiate_conc_mark_if_possible() as // we've already acted on it. clear_initiate_conc_mark_if_possible(); + + ergo_verbose0(ErgoConcCycles, + "initiate concurrent cycle", + ergo_format_reason("concurrent cycle initiation requested")); } else { // The concurrent marking thread is still finishing up the // previous cycle. If we start one right now the two cycles @@ -2418,6 +2506,9 @@ // and, if it's in a yield point, it's waiting for us to // finish. So, at this point we will not start a cycle and we'll // let the concurrent marking thread complete the last one. + ergo_verbose0(ErgoConcCycles, + "do not initiate concurrent cycle", + ergo_format_reason("concurrent cycle already in progress")); } } } @@ -2617,9 +2708,7 @@ _inc_cset_size = 0; _inc_cset_bytes_used_before = 0; - if (in_young_gc_mode()) { - _inc_cset_young_index = 0; - } + _inc_cset_young_index = 0; _inc_cset_max_finger = 0; _inc_cset_recorded_young_bytes = 0; @@ -2810,6 +2899,8 @@ // Set this here - in case we're not doing young collections. double non_young_start_time_sec = os::elapsedTime(); + YoungList* young_list = _g1->young_list(); + start_recording_regions(); guarantee(target_pause_time_ms > 0.0, @@ -2822,116 +2913,106 @@ double time_remaining_ms = target_pause_time_ms - base_time_ms; + ergo_verbose3(ErgoCSetConstruction | ErgoHigh, + "start choosing CSet", + ergo_format_ms("predicted base time") + ergo_format_ms("remaining time") + ergo_format_ms("target pause time"), + base_time_ms, time_remaining_ms, target_pause_time_ms); + // the 10% and 50% values are arbitrary... - if (time_remaining_ms < 0.10 * target_pause_time_ms) { + double threshold = 0.10 * target_pause_time_ms; + if (time_remaining_ms < threshold) { + double prev_time_remaining_ms = time_remaining_ms; time_remaining_ms = 0.50 * target_pause_time_ms; _within_target = false; + ergo_verbose3(ErgoCSetConstruction, + "adjust remaining time", + ergo_format_reason("remaining time lower than threshold") + ergo_format_ms("remaining time") + ergo_format_ms("threshold") + ergo_format_ms("adjusted remaining time"), + prev_time_remaining_ms, threshold, time_remaining_ms); } else { _within_target = true; } - // We figure out the number of bytes available for future to-space. - // For new regions without marking information, we must assume the - // worst-case of complete survival. If we have marking information for a - // region, we can bound the amount of live data. We can add a number of - // such regions, as long as the sum of the live data bounds does not - // exceed the available evacuation space. - size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes; - - size_t expansion_bytes = - _g1->expansion_regions() * HeapRegion::GrainBytes; + size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes; + + HeapRegion* hr; + double young_start_time_sec = os::elapsedTime(); _collection_set_bytes_used_before = 0; _collection_set_size = 0; - - // Adjust for expansion and slop. - max_live_bytes = max_live_bytes + expansion_bytes; - - HeapRegion* hr; - if (in_young_gc_mode()) { - double young_start_time_sec = os::elapsedTime(); - - if (G1PolicyVerbose > 0) { - gclog_or_tty->print_cr("Adding %d young regions to the CSet", - _g1->young_list()->length()); - } - - _young_cset_length = 0; - _last_young_gc_full = full_young_gcs() ? true : false; - - if (_last_young_gc_full) - ++_full_young_pause_num; - else - ++_partial_young_pause_num; - - // The young list is laid with the survivor regions from the previous - // pause are appended to the RHS of the young list, i.e. - // [Newly Young Regions ++ Survivors from last pause]. - - hr = _g1->young_list()->first_survivor_region(); - while (hr != NULL) { - assert(hr->is_survivor(), "badly formed young list"); - hr->set_young(); - hr = hr->get_next_young_region(); - } - - // Clear the fields that point to the survivor list - they are - // all young now. - _g1->young_list()->clear_survivors(); - - if (_g1->mark_in_progress()) - _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger); - - _young_cset_length = _inc_cset_young_index; - _collection_set = _inc_cset_head; - _collection_set_size = _inc_cset_size; - _collection_set_bytes_used_before = _inc_cset_bytes_used_before; - - // For young regions in the collection set, we assume the worst - // case of complete survival - max_live_bytes -= _inc_cset_size * HeapRegion::GrainBytes; - - time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms; - predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms; - - // The number of recorded young regions is the incremental - // collection set's current size - set_recorded_young_regions(_inc_cset_size); - set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths); - set_recorded_young_bytes(_inc_cset_recorded_young_bytes); + _young_cset_length = 0; + _last_young_gc_full = full_young_gcs() ? true : false; + + if (_last_young_gc_full) { + ++_full_young_pause_num; + } else { + ++_partial_young_pause_num; + } + + // The young list is laid with the survivor regions from the previous + // pause are appended to the RHS of the young list, i.e. + // [Newly Young Regions ++ Survivors from last pause]. + + size_t survivor_region_num = young_list->survivor_length(); + size_t eden_region_num = young_list->length() - survivor_region_num; + size_t old_region_num = 0; + hr = young_list->first_survivor_region(); + while (hr != NULL) { + assert(hr->is_survivor(), "badly formed young list"); + hr->set_young(); + hr = hr->get_next_young_region(); + } + + // Clear the fields that point to the survivor list - they are all young now. + young_list->clear_survivors(); + + if (_g1->mark_in_progress()) + _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger); + + _young_cset_length = _inc_cset_young_index; + _collection_set = _inc_cset_head; + _collection_set_size = _inc_cset_size; + _collection_set_bytes_used_before = _inc_cset_bytes_used_before; + time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms; + predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms; + + ergo_verbose3(ErgoCSetConstruction | ErgoHigh, + "add young regions to CSet", + ergo_format_region("eden") + ergo_format_region("survivors") + ergo_format_ms("predicted young region time"), + eden_region_num, survivor_region_num, + _inc_cset_predicted_elapsed_time_ms); + + // The number of recorded young regions is the incremental + // collection set's current size + set_recorded_young_regions(_inc_cset_size); + set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths); + set_recorded_young_bytes(_inc_cset_recorded_young_bytes); #if PREDICTIONS_VERBOSE - set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy); + set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy); #endif // PREDICTIONS_VERBOSE - if (G1PolicyVerbose > 0) { - gclog_or_tty->print_cr(" Added " PTR_FORMAT " Young Regions to CS.", - _inc_cset_size); - gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", - max_live_bytes/K); - } - - assert(_inc_cset_size == _g1->young_list()->length(), "Invariant"); - - double young_end_time_sec = os::elapsedTime(); - _recorded_young_cset_choice_time_ms = - (young_end_time_sec - young_start_time_sec) * 1000.0; - - // We are doing young collections so reset this. - non_young_start_time_sec = young_end_time_sec; - - // Note we can use either _collection_set_size or - // _young_cset_length here - if (_collection_set_size > 0 && _last_young_gc_full) { - // don't bother adding more regions... - goto choose_collection_set_end; - } - } - - if (!in_young_gc_mode() || !full_young_gcs()) { + assert(_inc_cset_size == young_list->length(), "Invariant"); + + double young_end_time_sec = os::elapsedTime(); + _recorded_young_cset_choice_time_ms = + (young_end_time_sec - young_start_time_sec) * 1000.0; + + // We are doing young collections so reset this. + non_young_start_time_sec = young_end_time_sec; + + if (!full_young_gcs()) { bool should_continue = true; NumberSeq seq; double avg_prediction = 100000000000000000.0; // something very large + size_t prev_collection_set_size = _collection_set_size; + double prev_predicted_pause_time_ms = predicted_pause_time_ms; do { hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, avg_prediction); @@ -2941,32 +3022,76 @@ predicted_pause_time_ms += predicted_time_ms; add_to_collection_set(hr); record_non_young_cset_region(hr); - max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); - if (G1PolicyVerbose > 0) { - gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", - max_live_bytes/K); - } seq.add(predicted_time_ms); avg_prediction = seq.avg() + seq.sd(); } - should_continue = - ( hr != NULL) && - ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0 - : _collection_set_size < _young_list_fixed_length ); + + should_continue = true; + if (hr == NULL) { + // No need for an ergo verbose message here, + // getNextMarkRegion() does this when it returns NULL. + should_continue = false; + } else { + if (adaptive_young_list_length()) { + if (time_remaining_ms < 0.0) { + ergo_verbose1(ErgoCSetConstruction, + "stop adding old regions to CSet", + ergo_format_reason("remaining time is lower than 0") + ergo_format_ms("remaining time"), + time_remaining_ms); + should_continue = false; + } + } else { + if (_collection_set_size < _young_list_fixed_length) { + ergo_verbose2(ErgoCSetConstruction, + "stop adding old regions to CSet", + ergo_format_reason("CSet length lower than target") + ergo_format_region("CSet") + ergo_format_region("young target"), + _collection_set_size, _young_list_fixed_length); + should_continue = false; + } + } + } } while (should_continue); if (!adaptive_young_list_length() && - _collection_set_size < _young_list_fixed_length) + _collection_set_size < _young_list_fixed_length) { + ergo_verbose2(ErgoCSetConstruction, + "request partially-young GCs end", + ergo_format_reason("CSet length lower than target") + ergo_format_region("CSet") + ergo_format_region("young target"), + _collection_set_size, _young_list_fixed_length); _should_revert_to_full_young_gcs = true; + } + + old_region_num = _collection_set_size - prev_collection_set_size; + + ergo_verbose2(ErgoCSetConstruction | ErgoHigh, + "add old regions to CSet", + ergo_format_region("old") + ergo_format_ms("predicted old region time"), + old_region_num, + predicted_pause_time_ms - prev_predicted_pause_time_ms); } -choose_collection_set_end: stop_incremental_cset_building(); count_CS_bytes_used(); end_recording_regions(); + ergo_verbose5(ErgoCSetConstruction, + "finish choosing CSet", + ergo_format_region("eden") + ergo_format_region("survivors") + ergo_format_region("old") + ergo_format_ms("predicted pause time") + ergo_format_ms("target pause time"), + eden_region_num, survivor_region_num, old_region_num, + predicted_pause_time_ms, target_pause_time_ms); + double non_young_end_time_sec = os::elapsedTime(); _recorded_non_young_cset_choice_time_ms = (non_young_end_time_sec - non_young_start_time_sec) * 1000.0; @@ -2978,12 +3103,6 @@ } void G1CollectorPolicy_BestRegionsFirst:: -expand_if_possible(size_t numRegions) { - size_t expansion_bytes = numRegions * HeapRegion::GrainBytes; - _g1->expand(expansion_bytes); -} - -void G1CollectorPolicy_BestRegionsFirst:: record_collection_pause_end() { G1CollectorPolicy::record_collection_pause_end(); assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -141,7 +141,6 @@ TruncatedSeq* _recent_rs_sizes; - TruncatedSeq* _concurrent_mark_init_times_ms; TruncatedSeq* _concurrent_mark_remark_times_ms; TruncatedSeq* _concurrent_mark_cleanup_times_ms; @@ -178,18 +177,15 @@ double* _par_last_gc_worker_end_times_ms; double* _par_last_gc_worker_times_ms; - // indicates that we are in young GC mode - bool _in_young_gc_mode; - // indicates whether we are in full young or partially young GC mode bool _full_young_gcs; // if true, then it tries to dynamically adjust the length of the // young list bool _adaptive_young_list_length; - size_t _young_list_min_length; size_t _young_list_target_length; size_t _young_list_fixed_length; + size_t _prev_eden_capacity; // used for logging // The max number of regions we can extend the eden by while the GC // locker is active. This should be >= _young_list_target_length; @@ -211,6 +207,9 @@ double _gc_overhead_perc; + double _reserve_factor; + size_t _reserve_regions; + bool during_marking() { return _during_marking; } @@ -247,6 +246,10 @@ TruncatedSeq* _max_conc_overhead_seq; + bool _using_new_ratio_calculations; + size_t _min_desired_young_length; // as set on the command line or default calculations + size_t _max_desired_young_length; // as set on the command line or default calculations + size_t _recorded_young_regions; size_t _recorded_non_young_regions; size_t _recorded_region_num; @@ -460,12 +463,6 @@ size_t predict_bytes_to_copy(HeapRegion* hr); double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); - // for use by: calculate_young_list_target_length(rs_length) - bool predict_will_fit(size_t young_region_num, - double base_time_ms, - size_t init_free_regions, - double target_pause_time_ms); - void start_recording_regions(); void record_cset_region_info(HeapRegion* hr, bool young); void record_non_young_cset_region(HeapRegion* hr); @@ -497,7 +494,6 @@ // -public: void cset_regions_freed() { bool propagate = _last_young_gc_full && !_in_marking_window; _short_lived_surv_rate_group->all_surviving_words_recorded(propagate); @@ -527,10 +523,6 @@ return _mmu_tracker->max_gc_time() * 1000.0; } - double predict_init_time_ms() { - return get_new_prediction(_concurrent_mark_init_times_ms); - } - double predict_remark_time_ms() { return get_new_prediction(_concurrent_mark_remark_times_ms); } @@ -776,14 +768,45 @@ // This set of variables tracks the collector efficiency, in order to // determine whether we should initiate a new marking. double _cur_mark_stop_world_time_ms; - double _mark_init_start_sec; double _mark_remark_start_sec; double _mark_cleanup_start_sec; double _mark_closure_time_ms; - void calculate_young_list_min_length(); - void calculate_young_list_target_length(); - void calculate_young_list_target_length(size_t rs_lengths); + // Update the young list target length either by setting it to the + // desired fixed value or by calculating it using G1's pause + // prediction model. If no rs_lengths parameter is passed, predict + // the RS lengths using the prediction model, otherwise use the + // given rs_lengths as the prediction. + void update_young_list_target_length(size_t rs_lengths = (size_t) -1); + + // Calculate and return the minimum desired young list target + // length. This is the minimum desired young list length according + // to the user's inputs. + size_t calculate_young_list_desired_min_length(size_t base_min_length); + + // Calculate and return the maximum desired young list target + // length. This is the maximum desired young list length according + // to the user's inputs. + size_t calculate_young_list_desired_max_length(); + + // Calculate and return the maximum young list target length that + // can fit into the pause time goal. The parameters are: rs_lengths + // represent the prediction of how large the young RSet lengths will + // be, base_min_length is the alreay existing number of regions in + // the young list, min_length and max_length are the desired min and + // max young list length according to the user's inputs. + size_t calculate_young_list_target_length(size_t rs_lengths, + size_t base_min_length, + size_t desired_min_length, + size_t desired_max_length); + + // Check whether a given young length (young_length) fits into the + // given target pause time and whether the prediction for the amount + // of objects to be copied for the given length will fit into the + // given free space (expressed by base_free_regions). It is used by + // calculate_young_list_target_length(). + bool predict_will_fit(size_t young_length, double base_time_ms, + size_t base_free_regions, double target_pause_time_ms); public: @@ -795,7 +818,10 @@ return CollectorPolicy::G1CollectorPolicyKind; } - void check_prediction_validity(); + // Check the current value of the young list RSet lengths and + // compare it against the last prediction. If the current value is + // higher, recalculate the young list target length prediction. + void revise_young_list_target_length_if_necessary(); size_t bytes_in_collection_set() { return _bytes_in_collection_set_before_gc; @@ -805,6 +831,9 @@ return _all_pause_times_ms->num() + 1; } + // This should be called after the heap is resized. + void record_new_heap_size(size_t new_number_of_regions); + protected: // Count the number of bytes used in the CS. @@ -816,6 +845,8 @@ size_t max_live_bytes); void record_concurrent_mark_cleanup_end_work2(); + void update_young_list_size_using_newratio(size_t number_of_heap_regions); + public: virtual void init(); @@ -849,9 +880,7 @@ size_t start_used); // Must currently be called while the world is stopped. - virtual void record_concurrent_mark_init_start(); - virtual void record_concurrent_mark_init_end(); - void record_concurrent_mark_init_end_pre(double + void record_concurrent_mark_init_end(double mark_init_elapsed_time_ms); void record_mark_closure_time(double mark_closure_time_ms); @@ -1056,7 +1085,7 @@ // new cycle, as long as we are not already in one. It's best if it // is called during a safepoint when the test whether a cycle is in // progress or not is stable. - bool force_initial_mark_if_outside_cycle(); + bool force_initial_mark_if_outside_cycle(GCCause::Cause gc_cause); // This is called at the very beginning of an evacuation pause (it // has to be the first thing that the pause does). If @@ -1101,30 +1130,17 @@ bool is_young_list_full() { size_t young_list_length = _g1->young_list()->length(); size_t young_list_target_length = _young_list_target_length; - if (G1FixedEdenSize) { - young_list_target_length -= _max_survivor_regions; - } return young_list_length >= young_list_target_length; } bool can_expand_young_list() { size_t young_list_length = _g1->young_list()->length(); size_t young_list_max_length = _young_list_max_length; - if (G1FixedEdenSize) { - young_list_max_length -= _max_survivor_regions; - } return young_list_length < young_list_max_length; } void update_region_num(bool young); - bool in_young_gc_mode() { - return _in_young_gc_mode; - } - void set_in_young_gc_mode(bool in_young_gc_mode) { - _in_young_gc_mode = in_young_gc_mode; - } - bool full_young_gcs() { return _full_young_gcs; } @@ -1228,10 +1244,10 @@ _survivors_age_table.merge_par(age_table); } - void calculate_max_gc_locker_expansion(); + void update_max_gc_locker_expansion(); // Calculates survivor space parameters. - void calculate_survivors_policy(); + void update_survivors_policy(); }; @@ -1258,8 +1274,6 @@ class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy { CollectionSetChooser* _collectionSetChooser; - // If the estimated is less then desirable, resize if possible. - void expand_if_possible(size_t numRegions); virtual void choose_collection_set(double target_pause_time_ms); virtual void record_collection_pause_start(double start_time_sec, @@ -1293,8 +1307,4 @@ return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d; } -// Local Variables: *** -// c-indentation-style: gnu *** -// End: *** - #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTORPOLICY_HPP diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1ErgoVerbose.hpp" +#include "utilities/ostream.hpp" + +ErgoLevel G1ErgoVerbose::_level; +bool G1ErgoVerbose::_enabled[ErgoHeuristicNum]; + +void G1ErgoVerbose::initialize() { + set_level(ErgoLow); + set_enabled(false); +} + +void G1ErgoVerbose::set_level(ErgoLevel level) { + _level = level; +} + +void G1ErgoVerbose::set_enabled(ErgoHeuristic n, bool enabled) { + assert(0 <= n && n < ErgoHeuristicNum, "pre-condition"); + _enabled[n] = enabled; +} + +void G1ErgoVerbose::set_enabled(bool enabled) { + for (int n = 0; n < ErgoHeuristicNum; n += 1) { + set_enabled((ErgoHeuristic) n, enabled); + } +} + +const char* G1ErgoVerbose::to_string(int tag) { + ErgoHeuristic n = extract_heuristic(tag); + switch (n) { + case ErgoHeapSizing: return "Heap Sizing"; + case ErgoCSetConstruction: return "CSet Construction"; + case ErgoConcCycles: return "Concurrent Cycles"; + case ErgoPartiallyYoungGCs: return "Partially-Young GCs"; + default: + ShouldNotReachHere(); + // Keep the Windows compiler happy + return NULL; + } +} + diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP + +#include "memory/allocation.hpp" +#include "utilities/debug.hpp" + +// The log of G1's heuristic decisions comprises of a series of +// records which have a similar format in order to maintain +// consistency across records and ultimately easier parsing of the +// output, if we ever choose to do that. Each record consists of: +// * A time stamp to be able to easily correlate each record with +// other events. +// * A unique string to allow us to easily identify such records. +// * The name of the heuristic the record corresponds to. +// * An action string which describes the action that G1 did or is +// about to do. +// * An optional reason string which describes the reason for the +// action. +// * An optional number of name/value pairs which contributed to the +// decision to take the action described in the record. +// +// Each record is associated with a "tag" which is the combination of +// the heuristic the record corresponds to, as well as the min level +// of verboseness at which the record should be printed. The tag is +// checked against the current settings to determine whether the record +// should be printed or not. + +// The available verboseness levels. +typedef enum { + // Determine which part of the tag is occupied by the level. + ErgoLevelShift = 8, + ErgoLevelMask = ~((1 << ErgoLevelShift) - 1), + + // ErgoLow is 0 so that we don't have to explicitly or a heuristic + // id with ErgoLow to keep its use simpler. + ErgoLow = 0, + ErgoHigh = 1 << ErgoLevelShift, +} ErgoLevel; + +// The available heuristics. +typedef enum { + // Determines which part of the tag is occupied by the heuristic id. + ErgoHeuristicMask = ~ErgoLevelMask, + + ErgoHeapSizing = 0, + ErgoCSetConstruction, + ErgoConcCycles, + ErgoPartiallyYoungGCs, + + ErgoHeuristicNum +} ErgoHeuristic; + +class G1ErgoVerbose : AllStatic { +private: + // Determines the minimum verboseness level at which records will be + // printed. + static ErgoLevel _level; + // Determines which heuristics are currently enabled. + static bool _enabled[ErgoHeuristicNum]; + + static ErgoLevel extract_level(int tag) { + return (ErgoLevel) (tag & ErgoLevelMask); + } + + static ErgoHeuristic extract_heuristic(int tag) { + return (ErgoHeuristic) (tag & ErgoHeuristicMask); + } + +public: + // Needs to be explicitly called at GC initialization. + static void initialize(); + + static void set_level(ErgoLevel level); + static void set_enabled(ErgoHeuristic h, bool enabled); + // It is applied to all heuristics. + static void set_enabled(bool enabled); + + static bool enabled(int tag) { + ErgoLevel level = extract_level(tag); + ErgoHeuristic n = extract_heuristic(tag); + return level <= _level && _enabled[n]; + } + + // Extract the heuristic id from the tag and return a string with + // its name. + static const char* to_string(int tag); +}; + +// The macros below generate the format string for values of different +// types and/or metrics. + +// The reason for the action is optional and is handled specially: the +// reason string is concatenated here so it's not necessary to pass it +// as a parameter. +#define ergo_format_reason(_reason_) ", reason: " _reason_ + +// Single parameter format strings +#define ergo_format_str(_name_) ", " _name_ ": %s" +#define ergo_format_region(_name_) ", " _name_ ": "SIZE_FORMAT" regions" +#define ergo_format_byte(_name_) ", " _name_ ": "SIZE_FORMAT" bytes" +#define ergo_format_double(_name_) ", " _name_ ": %1.2f" +#define ergo_format_perc(_name_) ", " _name_ ": %1.2f %%" +#define ergo_format_ms(_name_) ", " _name_ ": %1.2f ms" + +// Double parameter format strings +#define ergo_format_byte_perc(_name_) \ + ", " _name_ ": "SIZE_FORMAT" bytes (%1.2f %%)" + +// Generates the format string +#define ergo_format(_action_, _extra_format_) \ + " %1.3f: [G1Ergonomics (%s) " _action_ _extra_format_ "]" + +// Conditionally, prints an ergonomic decision record. _extra_format_ +// is the format string for the optional items we'd like to print +// (i.e., the decision's reason and any associated values). This +// string should be built up using the ergo_*_format macros (see +// above) to ensure consistency. +// +// Since we cannot rely on the compiler supporting variable argument +// macros, this macro accepts a fixed number of arguments and passes +// them to the print method. For convenience, we have wrapper macros +// below which take a specific number of arguments and set the rest to +// a default value. +#define ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) \ + do { \ + if (G1ErgoVerbose::enabled((_tag_))) { \ + gclog_or_tty->print_cr(ergo_format(_action_, _extra_format_), \ + os::elapsedTime(), \ + G1ErgoVerbose::to_string((_tag_)), \ + (_arg0_), (_arg1_), (_arg2_), \ + (_arg3_), (_arg4_), (_arg5_)); \ + } \ + } while (0) + + +#define ergo_verbose(_tag_, _action_) \ + ergo_verbose_common(_tag_, _action_, "", 0, 0, 0, 0, 0, 0) + +#define ergo_verbose0(_tag_, _action_, _extra_format_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, 0, 0, 0, 0, 0, 0) + +#define ergo_verbose1(_tag_, _action_, _extra_format_, \ + _arg0_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, 0, 0, 0, 0, 0) + +#define ergo_verbose2(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, 0, 0, 0, 0) + +#define ergo_verbose3(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, 0, 0, 0) + +#define ergo_verbose4(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, 0, 0) + +#define ergo_verbose5(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, _arg4_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, 0) + +#define ergo_verbose6(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) \ + ergo_verbose_common(_tag_, _action_, _extra_format_, \ + _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1ERGOVERBOSE_HPP diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1MMUTracker.cpp --- a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -97,10 +97,6 @@ // or performance (we are GC'ing most of the time anyway!), // simply overwrite the oldest entry in the tracker. - if (G1PolicyVerbose > 1) { - warning("MMU Tracker Queue overflow. Replacing earliest entry."); - } - _head_index = trim_index(_head_index + 1); assert(_head_index == _tail_index, "Because we have a full circular buffer"); _tail_index = trim_index(_tail_index + 1); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1OopClosures.hpp --- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -50,6 +50,8 @@ G1RemSet* _g1_rem; ConcurrentMark* _cm; G1ParScanThreadState* _par_scan_state; + bool _during_initial_mark; + bool _mark_in_progress; public: G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); bool apply_to_weak_ref_discovered_field() { return true; } @@ -102,8 +104,8 @@ class G1ParCopyHelper : public G1ParClosureSuper { G1ParScanClosure *_scanner; protected: - template void mark_forwardee(T* p); - oop copy_to_survivor_space(oop obj); + template void mark_object(T* p); + oop copy_to_survivor_space(oop obj, bool should_mark_copy); public: G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, G1ParScanClosure *scanner) : @@ -111,7 +113,7 @@ }; template + bool do_mark_object> class G1ParCopyClosure : public G1ParCopyHelper { G1ParScanClosure _scanner; template void do_oop_work(T* p); @@ -120,8 +122,6 @@ _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { } template void do_oop_nv(T* p) { do_oop_work(p); - if (do_mark_forwardee) - mark_forwardee(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -39,8 +39,6 @@ develop(intx, G1MarkingOverheadPercent, 0, \ "Overhead of concurrent marking") \ \ - develop(bool, G1Gen, true, \ - "If true, it will enable the generational G1") \ \ develop(intx, G1PolicyVerbose, 0, \ "The verbosity level on G1 policy decisions") \ @@ -126,9 +124,6 @@ develop(bool, G1RSBarrierNullFilter, true, \ "If true, generate null-pointer filtering code in RS barrier") \ \ - develop(bool, G1PrintCTFilterStats, false, \ - "If true, print stats on RS filtering effectiveness") \ - \ develop(bool, G1DeferredRSUpdate, true, \ "If true, use deferred RS updates") \ \ @@ -139,9 +134,9 @@ develop(bool, G1RSCountHisto, false, \ "If true, print a histogram of RS occupancies after each pause") \ \ - product(bool, G1PrintRegionLivenessInfo, false, \ - "Prints the liveness information for all regions in the heap " \ - "at the end of a marking cycle.") \ + diagnostic(bool, G1PrintRegionLivenessInfo, false, \ + "Prints the liveness information for all regions in the heap " \ + "at the end of a marking cycle.") \ \ develop(bool, G1PrintParCleanupStats, false, \ "When true, print extra stats about parallel cleanup.") \ @@ -233,7 +228,7 @@ "the number of regions for which we'll print a surv rate " \ "summary.") \ \ - product(intx, G1ReservePercent, 10, \ + product(uintx, G1ReservePercent, 10, \ "It determines the minimum reserve we should have in the heap " \ "to minimize the probability of promotion failure.") \ \ @@ -251,16 +246,6 @@ "When set, G1 will fail when it encounters an FP 'error', " \ "so as to allow debugging") \ \ - develop(bool, G1FixedTenuringThreshold, false, \ - "When set, G1 will not adjust the tenuring threshold") \ - \ - develop(bool, G1FixedEdenSize, false, \ - "When set, G1 will not allocate unused survivor space regions") \ - \ - develop(uintx, G1FixedSurvivorSpaceSize, 0, \ - "If non-0 is the size of the G1 survivor space, " \ - "otherwise SurvivorRatio is used to determine the size") \ - \ product(uintx, G1HeapRegionSize, 0, \ "Size of the G1 regions.") \ \ diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp --- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -36,7 +36,7 @@ }; template + bool do_mark_object> class G1ParCopyClosure; class G1ParScanClosure; class G1ParPushHeapRSClosure; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/g1/vm_operations_g1.cpp --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -98,7 +98,7 @@ // At this point we are supposed to start a concurrent cycle. We // will do so if one is not already in progress. - bool res = g1h->g1_policy()->force_initial_mark_if_outside_cycle(); + bool res = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause); // The above routine returns true if we were able to force the // next GC pause to be an initial mark; it returns false if a diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -909,10 +909,6 @@ } young_gen()->verify(allow_dirty); } - if (!silent) { - gclog_or_tty->print("ref_proc "); - } - ReferenceProcessor::verify(); } void ParallelScavengeHeap::print_heap_change(size_t prev_used) { diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -80,10 +80,6 @@ Universe::oops_do(&mark_and_push_closure); break; - case reference_processing: - ReferenceProcessor::oops_do(&mark_and_push_closure); - break; - case jni_handles: JNIHandles::oops_do(&mark_and_push_closure); break; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -98,8 +98,7 @@ management = 6, jvmti = 7, system_dictionary = 8, - reference_processing = 9, - code_cache = 10 + code_cache = 9 }; private: RootType _root_type; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -516,7 +516,6 @@ { ParallelScavengeHeap::ParStrongRootsScope psrs; Universe::oops_do(mark_and_push_closure()); - ReferenceProcessor::oops_do(mark_and_push_closure()); JNIHandles::oops_do(mark_and_push_closure()); // Global (strong) JNI handles CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true); Threads::oops_do(mark_and_push_closure(), &each_active_code_blob); @@ -623,7 +622,6 @@ // General strong roots. Universe::oops_do(adjust_root_pointer_closure()); - ReferenceProcessor::oops_do(adjust_root_pointer_closure()); JNIHandles::oops_do(adjust_root_pointer_closure()); // Global (strong) JNI handles Threads::oops_do(adjust_root_pointer_closure(), NULL); ObjectSynchronizer::oops_do(adjust_root_pointer_closure()); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -2445,7 +2445,6 @@ // General strong roots. Universe::oops_do(adjust_root_pointer_closure()); - ReferenceProcessor::oops_do(adjust_root_pointer_closure()); JNIHandles::oops_do(adjust_root_pointer_closure()); // Global (strong) JNI handles Threads::oops_do(adjust_root_pointer_closure(), NULL); ObjectSynchronizer::oops_do(adjust_root_pointer_closure()); diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -55,7 +55,6 @@ switch (_root_type) { case universe: Universe::oops_do(&roots_closure); - ReferenceProcessor::oops_do(&roots_closure); break; case jni_handles: diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -410,13 +410,13 @@ void CollectedHeap::pre_full_gc_dump() { if (HeapDumpBeforeFullGC) { - TraceTime tt("Heap Dump: ", PrintGCDetails, false, gclog_or_tty); + TraceTime tt("Heap Dump (before full gc): ", PrintGCDetails, false, gclog_or_tty); // We are doing a "major" collection and a heap dump before // major collection has been requested. HeapDumper::dump_heap(); } if (PrintClassHistogramBeforeFullGC) { - TraceTime tt("Class Histogram: ", PrintGCDetails, true, gclog_or_tty); + TraceTime tt("Class Histogram (before full gc): ", PrintGCDetails, true, gclog_or_tty); VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); inspector.doit(); } @@ -424,11 +424,11 @@ void CollectedHeap::post_full_gc_dump() { if (HeapDumpAfterFullGC) { - TraceTime tt("Heap Dump", PrintGCDetails, false, gclog_or_tty); + TraceTime tt("Heap Dump (after full gc): ", PrintGCDetails, false, gclog_or_tty); HeapDumper::dump_heap(); } if (PrintClassHistogramAfterFullGC) { - TraceTime tt("Class Histogram", PrintGCDetails, true, gclog_or_tty); + TraceTime tt("Class Histogram (after full gc): ", PrintGCDetails, true, gclog_or_tty); VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); inspector.doit(); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/interpreter/interpreterRuntime.cpp --- a/src/share/vm/interpreter/interpreterRuntime.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/interpreter/interpreterRuntime.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -1244,7 +1244,7 @@ // preparing the same method will be sure to see non-null entry & mirror. IRT_END -#if defined(IA32) || defined(AMD64) +#if defined(IA32) || defined(AMD64) || defined(ARM) IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) if (src_address == dest_address) { return; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/interpreter/interpreterRuntime.hpp --- a/src/share/vm/interpreter/interpreterRuntime.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/interpreter/interpreterRuntime.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -141,8 +141,8 @@ methodOopDesc* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) - // Popframe support (only needed on x86 and AMD64) +#if defined(IA32) || defined(AMD64) || defined(ARM) + // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); #endif diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/memory/genCollectedHeap.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -1269,10 +1269,6 @@ gclog_or_tty->print("remset "); } rem_set()->verify(); - if (!silent) { - gclog_or_tty->print("ref_proc "); - } - ReferenceProcessor::verify(); } void GenCollectedHeap::print() const { print_on(tty); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/memory/referenceProcessor.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -35,15 +35,15 @@ ReferencePolicy* ReferenceProcessor::_always_clear_soft_ref_policy = NULL; ReferencePolicy* ReferenceProcessor::_default_soft_ref_policy = NULL; -oop ReferenceProcessor::_sentinelRef = NULL; const int subclasses_of_ref = REF_PHANTOM - REF_OTHER; +bool ReferenceProcessor::_pending_list_uses_discovered_field = false; // List of discovered references. class DiscoveredList { public: DiscoveredList() : _len(0), _compressed_head(0), _oop_head(NULL) { } oop head() const { - return UseCompressedOops ? oopDesc::decode_heap_oop_not_null(_compressed_head) : + return UseCompressedOops ? oopDesc::decode_heap_oop(_compressed_head) : _oop_head; } HeapWord* adr_head() { @@ -53,12 +53,12 @@ void set_head(oop o) { if (UseCompressedOops) { // Must compress the head ptr. - _compressed_head = oopDesc::encode_heap_oop_not_null(o); + _compressed_head = oopDesc::encode_heap_oop(o); } else { _oop_head = o; } } - bool empty() const { return head() == ReferenceProcessor::sentinel_ref(); } + bool empty() const { return head() == NULL; } size_t length() { return _len; } void set_length(size_t len) { _len = len; } void inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); } @@ -76,21 +76,9 @@ } void ReferenceProcessor::init_statics() { - assert(_sentinelRef == NULL, "should be initialized precisely once"); - EXCEPTION_MARK; - _sentinelRef = instanceKlass::cast( - SystemDictionary::Reference_klass())-> - allocate_permanent_instance(THREAD); - // Initialize the master soft ref clock. java_lang_ref_SoftReference::set_clock(os::javaTimeMillis()); - if (HAS_PENDING_EXCEPTION) { - Handle ex(THREAD, PENDING_EXCEPTION); - vm_exit_during_initialization(ex); - } - assert(_sentinelRef != NULL && _sentinelRef->is_oop(), - "Just constructed it!"); _always_clear_soft_ref_policy = new AlwaysClearPolicy(); _default_soft_ref_policy = new COMPILER2_PRESENT(LRUMaxHeapPolicy()) NOT_COMPILER2(LRUCurrentHeapPolicy()); @@ -100,6 +88,7 @@ guarantee(RefDiscoveryPolicy == ReferenceBasedDiscovery || RefDiscoveryPolicy == ReferentBasedDiscovery, "Unrecongnized RefDiscoveryPolicy"); + _pending_list_uses_discovered_field = JDK_Version::current().pending_list_uses_discovered_field(); } ReferenceProcessor::ReferenceProcessor(MemRegion span, @@ -130,13 +119,12 @@ _discoveredWeakRefs = &_discoveredSoftRefs[_max_num_q]; _discoveredFinalRefs = &_discoveredWeakRefs[_max_num_q]; _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q]; - assert(sentinel_ref() != NULL, "_sentinelRef is NULL"); - // Initialized all entries to _sentinelRef + // Initialized all entries to NULL for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { - _discoveredSoftRefs[i].set_head(sentinel_ref()); + _discoveredSoftRefs[i].set_head(NULL); _discoveredSoftRefs[i].set_length(0); } - // If we do barreirs, cache a copy of the barrier set. + // If we do barriers, cache a copy of the barrier set. if (discovered_list_needs_barrier) { _bs = Universe::heap()->barrier_set(); } @@ -167,10 +155,6 @@ } } -void ReferenceProcessor::oops_do(OopClosure* f) { - f->do_oop(adr_sentinel_ref()); -} - void ReferenceProcessor::update_soft_ref_master_clock() { // Update (advance) the soft ref master clock field. This must be done // after processing the soft ref list. @@ -283,8 +267,6 @@ } #endif JNIHandles::weak_oops_do(is_alive, keep_alive); - // Finally remember to keep sentinel around - keep_alive->do_oop(adr_sentinel_ref()); complete_gc->do_void(); } @@ -327,46 +309,77 @@ void ReferenceProcessor::enqueue_discovered_reflist(DiscoveredList& refs_list, HeapWord* pending_list_addr) { // Given a list of refs linked through the "discovered" field - // (java.lang.ref.Reference.discovered) chain them through the - // "next" field (java.lang.ref.Reference.next) and prepend - // to the pending list. + // (java.lang.ref.Reference.discovered), self-loop their "next" field + // thus distinguishing them from active References, then + // prepend them to the pending list. + // BKWRD COMPATIBILITY NOTE: For older JDKs (prior to the fix for 4956777), + // the "next" field is used to chain the pending list, not the discovered + // field. + if (TraceReferenceGC && PrintGCDetails) { gclog_or_tty->print_cr("ReferenceProcessor::enqueue_discovered_reflist list " INTPTR_FORMAT, (address)refs_list.head()); } - oop obj = refs_list.head(); - // Walk down the list, copying the discovered field into - // the next field and clearing it (except for the last - // non-sentinel object which is treated specially to avoid - // confusion with an active reference). - while (obj != sentinel_ref()) { - assert(obj->is_instanceRef(), "should be reference object"); - oop next = java_lang_ref_Reference::discovered(obj); - if (TraceReferenceGC && PrintGCDetails) { - gclog_or_tty->print_cr(" obj " INTPTR_FORMAT "/next " INTPTR_FORMAT, - obj, next); + + oop obj = NULL; + oop next_d = refs_list.head(); + if (pending_list_uses_discovered_field()) { // New behaviour + // Walk down the list, self-looping the next field + // so that the References are not considered active. + while (obj != next_d) { + obj = next_d; + assert(obj->is_instanceRef(), "should be reference object"); + next_d = java_lang_ref_Reference::discovered(obj); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT, + obj, next_d); + } + assert(java_lang_ref_Reference::next(obj) == NULL, + "Reference not active; should not be discovered"); + // Self-loop next, so as to make Ref not active. + java_lang_ref_Reference::set_next(obj, obj); + if (next_d == obj) { // obj is last + // Swap refs_list into pendling_list_addr and + // set obj's discovered to what we read from pending_list_addr. + oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); + // Need oop_check on pending_list_addr above; + // see special oop-check code at the end of + // enqueue_discovered_reflists() further below. + java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL + } } - assert(java_lang_ref_Reference::next(obj) == NULL, - "The reference should not be enqueued"); - if (next == sentinel_ref()) { // obj is last - // Swap refs_list into pendling_list_addr and - // set obj's next to what we read from pending_list_addr. - oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); - // Need oop_check on pending_list_addr above; - // see special oop-check code at the end of - // enqueue_discovered_reflists() further below. - if (old == NULL) { - // obj should be made to point to itself, since - // pending list was empty. - java_lang_ref_Reference::set_next(obj, obj); + } else { // Old behaviour + // Walk down the list, copying the discovered field into + // the next field and clearing the discovered field. + while (obj != next_d) { + obj = next_d; + assert(obj->is_instanceRef(), "should be reference object"); + next_d = java_lang_ref_Reference::discovered(obj); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" obj " INTPTR_FORMAT "/next_d " INTPTR_FORMAT, + obj, next_d); + } + assert(java_lang_ref_Reference::next(obj) == NULL, + "The reference should not be enqueued"); + if (next_d == obj) { // obj is last + // Swap refs_list into pendling_list_addr and + // set obj's next to what we read from pending_list_addr. + oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); + // Need oop_check on pending_list_addr above; + // see special oop-check code at the end of + // enqueue_discovered_reflists() further below. + if (old == NULL) { + // obj should be made to point to itself, since + // pending list was empty. + java_lang_ref_Reference::set_next(obj, obj); + } else { + java_lang_ref_Reference::set_next(obj, old); + } } else { - java_lang_ref_Reference::set_next(obj, old); + java_lang_ref_Reference::set_next(obj, next_d); } - } else { - java_lang_ref_Reference::set_next(obj, next); + java_lang_ref_Reference::set_discovered(obj, (oop) NULL); } - java_lang_ref_Reference::set_discovered(obj, (oop) NULL); - obj = next; } } @@ -376,10 +389,9 @@ RefProcEnqueueTask(ReferenceProcessor& ref_processor, DiscoveredList discovered_refs[], HeapWord* pending_list_addr, - oop sentinel_ref, int n_queues) : EnqueueTask(ref_processor, discovered_refs, - pending_list_addr, sentinel_ref, n_queues) + pending_list_addr, n_queues) { } virtual void work(unsigned int work_id) { @@ -396,7 +408,7 @@ j++, index += _n_queues) { _ref_processor.enqueue_discovered_reflist( _refs_lists[index], _pending_list_addr); - _refs_lists[index].set_head(_sentinel_ref); + _refs_lists[index].set_head(NULL); _refs_lists[index].set_length(0); } } @@ -408,13 +420,13 @@ if (_processing_is_mt && task_executor != NULL) { // Parallel code RefProcEnqueueTask tsk(*this, _discoveredSoftRefs, - pending_list_addr, sentinel_ref(), _max_num_q); + pending_list_addr, _max_num_q); task_executor->execute(tsk); } else { // Serial code: call the parent class's implementation for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr); - _discoveredSoftRefs[i].set_head(sentinel_ref()); + _discoveredSoftRefs[i].set_head(NULL); _discoveredSoftRefs[i].set_length(0); } } @@ -428,7 +440,7 @@ BoolObjectClosure* is_alive); // End Of List. - inline bool has_next() const { return _next != ReferenceProcessor::sentinel_ref(); } + inline bool has_next() const { return _ref != NULL; } // Get oop to the Reference object. inline oop obj() const { return _ref; } @@ -468,9 +480,13 @@ inline void update_discovered() { // First _prev_next ref actually points into DiscoveredList (gross). if (UseCompressedOops) { - _keep_alive->do_oop((narrowOop*)_prev_next); + if (!oopDesc::is_null(*(narrowOop*)_prev_next)) { + _keep_alive->do_oop((narrowOop*)_prev_next); + } } else { - _keep_alive->do_oop((oop*)_prev_next); + if (!oopDesc::is_null(*(oop*)_prev_next)) { + _keep_alive->do_oop((oop*)_prev_next); + } } } @@ -488,6 +504,7 @@ private: DiscoveredList& _refs_list; HeapWord* _prev_next; + oop _prev; oop _ref; HeapWord* _discovered_addr; oop _next; @@ -509,6 +526,7 @@ BoolObjectClosure* is_alive) : _refs_list(refs_list), _prev_next(refs_list.adr_head()), + _prev(NULL), _ref(refs_list.head()), #ifdef ASSERT _first_seen(refs_list.head()), @@ -517,7 +535,7 @@ _processed(0), _removed(0), #endif - _next(refs_list.head()), + _next(NULL), _keep_alive(keep_alive), _is_alive(is_alive) { } @@ -544,26 +562,43 @@ inline void DiscoveredListIterator::next() { _prev_next = _discovered_addr; + _prev = _ref; move_to_next(); } inline void DiscoveredListIterator::remove() { assert(_ref->is_oop(), "Dropping a bad reference"); oop_store_raw(_discovered_addr, NULL); + // First _prev_next ref actually points into DiscoveredList (gross). + oop new_next; + if (_next == _ref) { + // At the end of the list, we should make _prev point to itself. + // If _ref is the first ref, then _prev_next will be in the DiscoveredList, + // and _prev will be NULL. + new_next = _prev; + } else { + new_next = _next; + } + if (UseCompressedOops) { // Remove Reference object from list. - oopDesc::encode_store_heap_oop_not_null((narrowOop*)_prev_next, _next); + oopDesc::encode_store_heap_oop((narrowOop*)_prev_next, new_next); } else { // Remove Reference object from list. - oopDesc::store_heap_oop((oop*)_prev_next, _next); + oopDesc::store_heap_oop((oop*)_prev_next, new_next); } NOT_PRODUCT(_removed++); _refs_list.dec_length(1); } inline void DiscoveredListIterator::move_to_next() { - _ref = _next; + if (_ref == _next) { + // End of the list. + _ref = NULL; + } else { + _ref = _next; + } assert(_ref != _first_seen, "cyclic ref_list found"); NOT_PRODUCT(_processed++); } @@ -613,7 +648,7 @@ NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { gclog_or_tty->print_cr(" Dropped %d dead Refs out of %d " - "discovered Refs by policy list " INTPTR_FORMAT, + "discovered Refs by policy, from list " INTPTR_FORMAT, iter.removed(), iter.processed(), (address)refs_list.head()); } ) @@ -725,24 +760,30 @@ assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference"); iter.next(); } - // Remember to keep sentinel pointer around + // Remember to update the next pointer of the last ref. iter.update_discovered(); // Close the reachable set complete_gc->do_void(); } void -ReferenceProcessor::abandon_partial_discovered_list(DiscoveredList& refs_list) { - oop obj = refs_list.head(); - while (obj != sentinel_ref()) { - oop discovered = java_lang_ref_Reference::discovered(obj); +ReferenceProcessor::clear_discovered_references(DiscoveredList& refs_list) { + oop obj = NULL; + oop next = refs_list.head(); + while (next != obj) { + obj = next; + next = java_lang_ref_Reference::discovered(obj); java_lang_ref_Reference::set_discovered_raw(obj, NULL); - obj = discovered; } - refs_list.set_head(sentinel_ref()); + refs_list.set_head(NULL); refs_list.set_length(0); } +void +ReferenceProcessor::abandon_partial_discovered_list(DiscoveredList& refs_list) { + clear_discovered_references(refs_list); +} + void ReferenceProcessor::abandon_partial_discovery() { // loop over the lists for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { @@ -859,6 +900,9 @@ refs_to_move = MIN2(ref_lists[from_idx].length() - avg_refs, avg_refs - ref_lists[to_idx].length()); } + + assert(refs_to_move > 0, "otherwise the code below will fail"); + oop move_head = ref_lists[from_idx].head(); oop move_tail = move_head; oop new_head = move_head; @@ -867,10 +911,24 @@ move_tail = new_head; new_head = java_lang_ref_Reference::discovered(new_head); } - java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head()); + + // Add the chain to the to list. + if (ref_lists[to_idx].head() == NULL) { + // to list is empty. Make a loop at the end. + java_lang_ref_Reference::set_discovered(move_tail, move_tail); + } else { + java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head()); + } ref_lists[to_idx].set_head(move_head); ref_lists[to_idx].inc_length(refs_to_move); - ref_lists[from_idx].set_head(new_head); + + // Remove the chain from the from list. + if (move_tail == new_head) { + // We found the end of the from list. + ref_lists[from_idx].set_head(NULL); + } else { + ref_lists[from_idx].set_head(new_head); + } ref_lists[from_idx].dec_length(refs_to_move); if (ref_lists[from_idx].length() == 0) { break; @@ -1082,42 +1140,40 @@ // First we must make sure this object is only enqueued once. CAS in a non null // discovered_addr. oop current_head = refs_list.head(); + // The last ref must have its discovered field pointing to itself. + oop next_discovered = (current_head != NULL) ? current_head : obj; // Note: In the case of G1, this specific pre-barrier is strictly // not necessary because the only case we are interested in // here is when *discovered_addr is NULL (see the CAS further below), // so this will expand to nothing. As a result, we have manually // elided this out for G1, but left in the test for some future - // collector that might have need for a pre-barrier here. - if (_discovered_list_needs_barrier && !UseG1GC) { - if (UseCompressedOops) { - _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head); - } else { - _bs->write_ref_field_pre((oop*)discovered_addr, current_head); - } - guarantee(false, "Need to check non-G1 collector"); - } - oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr, + // collector that might have need for a pre-barrier here, e.g.:- + // _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); + assert(!_discovered_list_needs_barrier || UseG1GC, + "Need to check non-G1 collector: " + "may need a pre-write-barrier for CAS from NULL below"); + oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr, NULL); if (retest == NULL) { // This thread just won the right to enqueue the object. - // We have separate lists for enqueueing so no synchronization + // We have separate lists for enqueueing, so no synchronization // is necessary. refs_list.set_head(obj); refs_list.inc_length(1); if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, current_head); + _bs->write_ref_field((void*)discovered_addr, next_discovered); } if (TraceReferenceGC) { - gclog_or_tty->print_cr("Enqueued reference (mt) (" INTPTR_FORMAT ": %s)", + gclog_or_tty->print_cr("Discovered reference (mt) (" INTPTR_FORMAT ": %s)", obj, obj->blueprint()->internal_name()); } } else { // If retest was non NULL, another thread beat us to it: // The reference has already been discovered... if (TraceReferenceGC) { - gclog_or_tty->print_cr("Already enqueued reference (" INTPTR_FORMAT ": %s)", + gclog_or_tty->print_cr("Already discovered reference (" INTPTR_FORMAT ": %s)", obj, obj->blueprint()->internal_name()); } } @@ -1142,7 +1198,7 @@ // (or part of the heap being collected, indicated by our "span" // we don't treat it specially (i.e. we scan it as we would // a normal oop, treating its references as strong references). -// This means that references can't be enqueued unless their +// This means that references can't be discovered unless their // referent is also in the same span. This is the simplest, // most "local" and most conservative approach, albeit one // that may cause weak references to be enqueued least promptly. @@ -1164,14 +1220,13 @@ // and complexity in processing these references. // We call this choice the "RefeferentBasedDiscovery" policy. bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) { - // We enqueue references only if we are discovering refs - // (rather than processing discovered refs). + // Make sure we are discovering refs (rather than processing discovered refs). if (!_discovering_refs || !RegisterReferences) { return false; } - // We only enqueue active references. + // We only discover active references. oop next = java_lang_ref_Reference::next(obj); - if (next != NULL) { + if (next != NULL) { // Ref is no longer active return false; } @@ -1184,8 +1239,8 @@ return false; } - // We only enqueue references whose referents are not (yet) strongly - // reachable. + // We only discover references whose referents are not (yet) + // known to be strongly reachable. if (is_alive_non_header() != NULL) { verify_referent(obj); if (is_alive_non_header()->do_object_b(java_lang_ref_Reference::referent(obj))) { @@ -1211,7 +1266,7 @@ if (discovered != NULL) { // The reference has already been discovered... if (TraceReferenceGC) { - gclog_or_tty->print_cr("Already enqueued reference (" INTPTR_FORMAT ": %s)", + gclog_or_tty->print_cr("Already discovered reference (" INTPTR_FORMAT ": %s)", obj, obj->blueprint()->internal_name()); } if (RefDiscoveryPolicy == ReferentBasedDiscovery) { @@ -1233,9 +1288,9 @@ if (RefDiscoveryPolicy == ReferentBasedDiscovery) { verify_referent(obj); - // enqueue if and only if either: - // reference is in our span or - // we are an atomic collector and referent is in our span + // Discover if and only if EITHER: + // .. reference is in our span, OR + // .. we are an atomic collector and referent is in our span if (_span.contains(obj_addr) || (discovery_is_atomic() && _span.contains(java_lang_ref_Reference::referent(obj)))) { @@ -1262,30 +1317,28 @@ // here: the field will be visited later when processing the discovered // references. oop current_head = list->head(); + // The last ref must have its discovered field pointing to itself. + oop next_discovered = (current_head != NULL) ? current_head : obj; + // As in the case further above, since we are over-writing a NULL // pre-value, we can safely elide the pre-barrier here for the case of G1. + // e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); assert(discovered == NULL, "control point invariant"); - if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1 - if (UseCompressedOops) { - _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head); - } else { - _bs->write_ref_field_pre((oop*)discovered_addr, current_head); - } - guarantee(false, "Need to check non-G1 collector"); - } - oop_store_raw(discovered_addr, current_head); + assert(!_discovered_list_needs_barrier || UseG1GC, + "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below"); + oop_store_raw(discovered_addr, next_discovered); if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, current_head); + _bs->write_ref_field((void*)discovered_addr, next_discovered); } list->set_head(obj); list->inc_length(1); if (TraceReferenceGC) { - gclog_or_tty->print_cr("Enqueued reference (" INTPTR_FORMAT ": %s)", + gclog_or_tty->print_cr("Discovered reference (" INTPTR_FORMAT ": %s)", obj, obj->blueprint()->internal_name()); } } - assert(obj->is_oop(), "Enqueued a bad reference"); + assert(obj->is_oop(), "Discovered a bad reference"); verify_referent(obj); return true; } @@ -1437,22 +1490,12 @@ } #endif -void ReferenceProcessor::verify() { - guarantee(sentinel_ref() != NULL && sentinel_ref()->is_oop(), "Lost _sentinelRef"); -} - #ifndef PRODUCT void ReferenceProcessor::clear_discovered_references() { guarantee(!_discovering_refs, "Discovering refs?"); for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { - oop obj = _discoveredSoftRefs[i].head(); - while (obj != sentinel_ref()) { - oop next = java_lang_ref_Reference::discovered(obj); - java_lang_ref_Reference::set_discovered(obj, (oop) NULL); - obj = next; - } - _discoveredSoftRefs[i].set_head(sentinel_ref()); - _discoveredSoftRefs[i].set_length(0); + clear_discovered_references(_discoveredSoftRefs[i]); } } + #endif // PRODUCT diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/memory/referenceProcessor.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -52,8 +52,8 @@ class ReferenceProcessor : public CHeapObj { protected: - // End of list marker - static oop _sentinelRef; + // Compatibility with pre-4965777 JDK's + static bool _pending_list_uses_discovered_field; MemRegion _span; // (right-open) interval of heap // subject to wkref discovery bool _discovering_refs; // true when discovery enabled @@ -106,8 +106,6 @@ int max_num_q() { return _max_num_q; } void set_active_mt_degree(int v) { _num_q = v; } DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } - static oop sentinel_ref() { return _sentinelRef; } - static oop* adr_sentinel_ref() { return &_sentinelRef; } ReferencePolicy* setup_policy(bool always_clear) { _current_soft_ref_policy = always_clear ? _always_clear_soft_ref_policy : _default_soft_ref_policy; @@ -115,7 +113,6 @@ return _current_soft_ref_policy; } - public: // Process references with a certain reachability level. void process_discovered_reflist(DiscoveredList refs_lists[], ReferencePolicy* policy, @@ -230,6 +227,7 @@ HeapWord* discovered_addr); void verify_ok_to_handle_reflists() PRODUCT_RETURN; + void clear_discovered_references(DiscoveredList& refs_list); void abandon_partial_discovered_list(DiscoveredList& refs_list); // Calculate the number of jni handles. @@ -300,6 +298,13 @@ bool discovery_is_atomic() const { return _discovery_is_atomic; } void set_atomic_discovery(bool atomic) { _discovery_is_atomic = atomic; } + // whether the JDK in which we are embedded is a pre-4965777 JDK, + // and thus whether or not it uses the discovered field to chain + // the entries in the pending list. + static bool pending_list_uses_discovered_field() { + return _pending_list_uses_discovered_field; + } + // whether discovery is done by multiple threads same-old-timeously bool discovery_is_mt() const { return _discovery_is_mt; } void set_mt_discovery(bool mt) { _discovery_is_mt = mt; } @@ -314,7 +319,6 @@ // iterate over oops void weak_oops_do(OopClosure* f); // weak roots - static void oops_do(OopClosure* f); // strong root(s) // Balance each of the discovered lists. void balance_all_queues(); @@ -340,7 +344,6 @@ // debugging void verify_no_references_recorded() PRODUCT_RETURN; void verify_referent(oop obj) PRODUCT_RETURN; - static void verify(); // clear the discovered lists (unlinking each entry). void clear_discovered_references() PRODUCT_RETURN; @@ -524,12 +527,10 @@ EnqueueTask(ReferenceProcessor& ref_processor, DiscoveredList refs_lists[], HeapWord* pending_list_addr, - oop sentinel_ref, int n_queues) : _ref_processor(ref_processor), _refs_lists(refs_lists), _pending_list_addr(pending_list_addr), - _sentinel_ref(sentinel_ref), _n_queues(n_queues) { } @@ -540,7 +541,6 @@ ReferenceProcessor& _ref_processor; DiscoveredList* _refs_lists; HeapWord* _pending_list_addr; - oop _sentinel_ref; int _n_queues; }; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/memory/sharedHeap.cpp --- a/src/share/vm/memory/sharedHeap.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/memory/sharedHeap.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -146,7 +146,6 @@ assert(_strong_roots_parity != 0, "must have called prologue code"); if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) { Universe::oops_do(roots); - ReferenceProcessor::oops_do(roots); // Consider perm-gen discovered lists to be strong. perm_gen()->ref_processor()->weak_oops_do(roots); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/oops/instanceRefKlass.cpp --- a/src/share/vm/oops/instanceRefKlass.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/oops/instanceRefKlass.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -56,9 +56,8 @@ if (!oopDesc::is_null(heap_oop)) { oop referent = oopDesc::decode_heap_oop_not_null(heap_oop); if (!referent->is_gc_marked() && - MarkSweep::ref_processor()-> - discover_reference(obj, ref->reference_type())) { - // reference already enqueued, referent will be traversed later + MarkSweep::ref_processor()->discover_reference(obj, ref->reference_type())) { + // reference was discovered, referent will be traversed later ref->instanceKlass::oop_follow_contents(obj); debug_only( if(TraceReferenceGC && PrintGCDetails) { @@ -76,8 +75,34 @@ MarkSweep::mark_and_push(referent_addr); } } - // treat next as normal oop. next is a link in the pending list. T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj); + if (ReferenceProcessor::pending_list_uses_discovered_field()) { + // Treat discovered as normal oop, if ref is not "active", + // i.e. if next is non-NULL. + T next_oop = oopDesc::load_heap_oop(next_addr); + if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active" + T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); + debug_only( + if(TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" Process discovered as normal " + INTPTR_FORMAT, discovered_addr); + } + ) + MarkSweep::mark_and_push(discovered_addr); + } + } else { +#ifdef ASSERT + // In the case of older JDKs which do not use the discovered + // field for the pending list, an inactive ref (next != NULL) + // must always have a NULL discovered field. + oop next = oopDesc::load_decode_heap_oop(next_addr); + oop discovered = java_lang_ref_Reference::discovered(obj); + assert(oopDesc::is_null(next) || oopDesc::is_null(discovered), + err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field", + obj)); +#endif + } + // treat next as normal oop. next is a link in the reference queue. debug_only( if(TraceReferenceGC && PrintGCDetails) { gclog_or_tty->print_cr(" Process next as normal " INTPTR_FORMAT, next_addr); @@ -130,13 +155,33 @@ PSParallelCompact::mark_and_push(cm, referent_addr); } } - // treat next as normal oop. next is a link in the pending list. T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj); - debug_only( - if(TraceReferenceGC && PrintGCDetails) { - gclog_or_tty->print_cr(" Process next as normal " INTPTR_FORMAT, next_addr); + if (ReferenceProcessor::pending_list_uses_discovered_field()) { + // Treat discovered as normal oop, if ref is not "active", + // i.e. if next is non-NULL. + T next_oop = oopDesc::load_heap_oop(next_addr); + if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active" + T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); + debug_only( + if(TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" Process discovered as normal " + INTPTR_FORMAT, discovered_addr); + } + ) + PSParallelCompact::mark_and_push(cm, discovered_addr); } - ) + } else { +#ifdef ASSERT + // In the case of older JDKs which do not use the discovered + // field for the pending list, an inactive ref (next != NULL) + // must always have a NULL discovered field. + T next = oopDesc::load_heap_oop(next_addr); + oop discovered = java_lang_ref_Reference::discovered(obj); + assert(oopDesc::is_null(next) || oopDesc::is_null(discovered), + err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field", + obj)); +#endif + } PSParallelCompact::mark_and_push(cm, next_addr); ref->instanceKlass::oop_follow_contents(cm, obj); } @@ -197,27 +242,53 @@ } #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains) \ + T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); \ if (closure->apply_to_weak_ref_discovered_field()) { \ - T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); \ closure->do_oop##nv_suffix(disc_addr); \ } \ \ T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj); \ T heap_oop = oopDesc::load_heap_oop(referent_addr); \ - if (!oopDesc::is_null(heap_oop) && contains(referent_addr)) { \ - ReferenceProcessor* rp = closure->_ref_processor; \ + ReferenceProcessor* rp = closure->_ref_processor; \ + if (!oopDesc::is_null(heap_oop)) { \ oop referent = oopDesc::decode_heap_oop_not_null(heap_oop); \ if (!referent->is_gc_marked() && (rp != NULL) && \ rp->discover_reference(obj, reference_type())) { \ return size; \ - } else { \ + } else if (contains(referent_addr)) { \ /* treat referent as normal oop */ \ SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk);\ closure->do_oop##nv_suffix(referent_addr); \ } \ } \ + T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj); \ + if (ReferenceProcessor::pending_list_uses_discovered_field()) { \ + T next_oop = oopDesc::load_heap_oop(next_addr); \ + /* Treat discovered as normal oop, if ref is not "active" (next non-NULL) */\ + if (!oopDesc::is_null(next_oop) && contains(disc_addr)) { \ + /* i.e. ref is not "active" */ \ + debug_only( \ + if(TraceReferenceGC && PrintGCDetails) { \ + gclog_or_tty->print_cr(" Process discovered as normal " \ + INTPTR_FORMAT, disc_addr); \ + } \ + ) \ + SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk);\ + closure->do_oop##nv_suffix(disc_addr); \ + } \ + } else { \ + /* In the case of older JDKs which do not use the discovered field for */ \ + /* the pending list, an inactive ref (next != NULL) must always have a */ \ + /* NULL discovered field. */ \ + debug_only( \ + T next_oop = oopDesc::load_heap_oop(next_addr); \ + T disc_oop = oopDesc::load_heap_oop(disc_addr); \ + assert(oopDesc::is_null(next_oop) || oopDesc::is_null(disc_oop), \ + err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL" \ + "discovered field", obj)); \ + ) \ + } \ /* treat next as normal oop */ \ - T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj); \ if (contains(next_addr)) { \ SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::irk); \ closure->do_oop##nv_suffix(next_addr); \ @@ -306,8 +377,37 @@ pm->claim_or_forward_depth(referent_addr); } } - // treat next as normal oop + // Treat discovered as normal oop, if ref is not "active", + // i.e. if next is non-NULL. T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj); + if (ReferenceProcessor::pending_list_uses_discovered_field()) { + T next_oop = oopDesc::load_heap_oop(next_addr); + if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active" + T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); + debug_only( + if(TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" Process discovered as normal " + INTPTR_FORMAT, discovered_addr); + } + ) + if (PSScavenge::should_scavenge(discovered_addr)) { + pm->claim_or_forward_depth(discovered_addr); + } + } + } else { +#ifdef ASSERT + // In the case of older JDKs which do not use the discovered + // field for the pending list, an inactive ref (next != NULL) + // must always have a NULL discovered field. + oop next = oopDesc::load_decode_heap_oop(next_addr); + oop discovered = java_lang_ref_Reference::discovered(obj); + assert(oopDesc::is_null(next) || oopDesc::is_null(discovered), + err_msg("Found an inactive reference " PTR_FORMAT " with a non-NULL discovered field", + obj)); +#endif + } + + // Treat next as normal oop; next is a link in the reference queue. if (PSScavenge::should_scavenge(next_addr)) { pm->claim_or_forward_depth(next_addr); } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/prims/jvm.h --- a/src/share/vm/prims/jvm.h Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/prims/jvm.h Fri Sep 09 12:44:37 2011 -0700 @@ -1579,7 +1579,8 @@ */ unsigned int thread_park_blocker : 1; unsigned int post_vm_init_hook_enabled : 1; - unsigned int : 30; + unsigned int pending_list_uses_discovered_field : 1; + unsigned int : 29; unsigned int : 32; unsigned int : 32; } jdk_version_info; diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/runtime/arguments.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -1423,6 +1423,9 @@ if (FLAG_IS_DEFAULT(MinHeapDeltaBytes)) { FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M); } + // For those collectors or operating systems (eg, Windows) that do + // not support full UseNUMA, we will map to UseNUMAInterleaving for now + UseNUMAInterleaving = true; } } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/runtime/globals.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -475,6 +475,12 @@ product(bool, UseNUMA, false, \ "Use NUMA if available") \ \ + product(bool, UseNUMAInterleaving, false, \ + "Interleave memory across NUMA nodes if available") \ + \ + product(uintx, NUMAInterleaveGranularity, 2*M, \ + "Granularity to use for NUMA interleaving on Windows OS") \ + \ product(bool, ForceNUMA, false, \ "Force NUMA optimizations on single-node/UMA systems") \ \ diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/runtime/java.cpp --- a/src/share/vm/runtime/java.cpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/runtime/java.cpp Fri Sep 09 12:44:37 2011 -0700 @@ -672,7 +672,8 @@ _current = JDK_Version(major, minor, micro, info.update_version, info.special_update_version, build, info.thread_park_blocker == 1, - info.post_vm_init_hook_enabled == 1); + info.post_vm_init_hook_enabled == 1, + info.pending_list_uses_discovered_field == 1); } } diff -r 8f47d8870d9a -r 5257f8e66b40 src/share/vm/runtime/java.hpp --- a/src/share/vm/runtime/java.hpp Thu Sep 08 09:35:41 2011 +0200 +++ b/src/share/vm/runtime/java.hpp Fri Sep 09 12:44:37 2011 -0700 @@ -92,6 +92,7 @@ bool _partially_initialized; bool _thread_park_blocker; + bool _pending_list_uses_discovered_field; bool _post_vm_init_hook_enabled; bool is_valid() const { @@ -114,15 +115,18 @@ JDK_Version() : _major(0), _minor(0), _micro(0), _update(0), _special(0), _build(0), _partially_initialized(false), - _thread_park_blocker(false), _post_vm_init_hook_enabled(false) {} + _thread_park_blocker(false), _post_vm_init_hook_enabled(false), + _pending_list_uses_discovered_field(false) {} JDK_Version(uint8_t major, uint8_t minor = 0, uint8_t micro = 0, uint8_t update = 0, uint8_t special = 0, uint8_t build = 0, - bool thread_park_blocker = false, bool post_vm_init_hook_enabled = false) : + bool thread_park_blocker = false, bool post_vm_init_hook_enabled = false, + bool pending_list_uses_discovered_field = false) : _major(major), _minor(minor), _micro(micro), _update(update), _special(special), _build(build), _partially_initialized(false), _thread_park_blocker(thread_park_blocker), - _post_vm_init_hook_enabled(post_vm_init_hook_enabled) {} + _post_vm_init_hook_enabled(post_vm_init_hook_enabled), + _pending_list_uses_discovered_field(pending_list_uses_discovered_field) {} // Returns the current running JDK version static JDK_Version current() { return _current; } @@ -149,6 +153,10 @@ bool post_vm_init_hook_enabled() const { return _post_vm_init_hook_enabled; } + // For compatibility wrt pre-4965777 JDK's + bool pending_list_uses_discovered_field() const { + return _pending_list_uses_discovered_field; + } // Performs a full ordering comparison using all fields (update, build, etc.) int compare(const JDK_Version& other) const;