# HG changeset patch # User jcoomes # Date 1319841376 25200 # Node ID 6534482ff68ad79066dfe15dfb6d8905f09681bd # Parent 02fe430d493e72bfcdecf85849171a974ea08424# Parent 8487c835efbfd2ff42f829726f46f7fc1ea909b5 Merge diff -r 02fe430d493e -r 6534482ff68a make/hotspot_version --- a/make/hotspot_version Thu Oct 27 13:54:31 2011 -0700 +++ b/make/hotspot_version Fri Oct 28 15:36:16 2011 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=23 HS_MINOR_VER=0 -HS_BUILD_NUMBER=03 +HS_BUILD_NUMBER=04 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -62,7 +62,7 @@ MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment; assert(IndexSetStart == 0 && IndexSetStride == 0, "already set"); - IndexSetStart = MinObjAlignment; + IndexSetStart = (int) MinChunkSize; IndexSetStride = MinObjAlignment; } @@ -138,7 +138,7 @@ } else { _fitStrategy = FreeBlockStrategyNone; } - checkFreeListConsistency(); + check_free_list_consistency(); // Initialize locks for parallel case. @@ -1358,17 +1358,29 @@ ShouldNotReachHere(); } -bool CompactibleFreeListSpace::verifyChunkInIndexedFreeLists(FreeChunk* fc) - const { +bool CompactibleFreeListSpace::verifyChunkInIndexedFreeLists(FreeChunk* fc) const { assert(fc->size() < IndexSetSize, "Size of chunk is too large"); return _indexedFreeList[fc->size()].verifyChunkInFreeLists(fc); } +bool CompactibleFreeListSpace::verify_chunk_is_linear_alloc_block(FreeChunk* fc) const { + assert((_smallLinearAllocBlock._ptr != (HeapWord*)fc) || + (_smallLinearAllocBlock._word_size == fc->size()), + "Linear allocation block shows incorrect size"); + return ((_smallLinearAllocBlock._ptr == (HeapWord*)fc) && + (_smallLinearAllocBlock._word_size == fc->size())); +} + +// Check if the purported free chunk is present either as a linear +// allocation block, the size-indexed table of (smaller) free blocks, +// or the larger free blocks kept in the binary tree dictionary. bool CompactibleFreeListSpace::verifyChunkInFreeLists(FreeChunk* fc) const { - if (fc->size() >= IndexSetSize) { + if (verify_chunk_is_linear_alloc_block(fc)) { + return true; + } else if (fc->size() < IndexSetSize) { + return verifyChunkInIndexedFreeLists(fc); + } else { return dictionary()->verifyChunkInFreeLists(fc); - } else { - return verifyChunkInIndexedFreeLists(fc); } } @@ -2495,7 +2507,8 @@ FreeChunk* tail = _indexedFreeList[size].tail(); size_t num = _indexedFreeList[size].count(); size_t n = 0; - guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty"); + guarantee(((size >= MinChunkSize) && (size % IndexSetStride == 0)) || fc == NULL, + "Slot should have been empty"); for (; fc != NULL; fc = fc->next(), n++) { guarantee(fc->size() == size, "Size inconsistency"); guarantee(fc->isFree(), "!free?"); @@ -2506,14 +2519,14 @@ } #ifndef PRODUCT -void CompactibleFreeListSpace::checkFreeListConsistency() const { +void CompactibleFreeListSpace::check_free_list_consistency() const { assert(_dictionary->minSize() <= IndexSetSize, "Some sizes can't be allocated without recourse to" " linear allocation buffers"); assert(MIN_TREE_CHUNK_SIZE*HeapWordSize == sizeof(TreeChunk), "else MIN_TREE_CHUNK_SIZE is wrong"); - assert((IndexSetStride == 2 && IndexSetStart == 2) || - (IndexSetStride == 1 && IndexSetStart == 1), "just checking"); + assert((IndexSetStride == 2 && IndexSetStart == 4) || // 32-bit + (IndexSetStride == 1 && IndexSetStart == 3), "just checking"); // 64-bit assert((IndexSetStride != 2) || (MinChunkSize % 2 == 0), "Some for-loops may be incorrectly initialized"); assert((IndexSetStride != 2) || (IndexSetSize % 2 == 1), @@ -2688,33 +2701,27 @@ } } +// If this is changed in the future to allow parallel +// access, one would need to take the FL locks and, +// depending on how it is used, stagger access from +// parallel threads to reduce contention. void CFLS_LAB::retire(int tid) { // We run this single threaded with the world stopped; // so no need for locks and such. -#define CFLS_LAB_PARALLEL_ACCESS 0 NOT_PRODUCT(Thread* t = Thread::current();) assert(Thread::current()->is_VM_thread(), "Error"); - assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride, - "Will access to uninitialized slot below"); -#if CFLS_LAB_PARALLEL_ACCESS - for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1; - i > 0; - i -= CompactibleFreeListSpace::IndexSetStride) { -#else // CFLS_LAB_PARALLEL_ACCESS for (size_t i = CompactibleFreeListSpace::IndexSetStart; i < CompactibleFreeListSpace::IndexSetSize; i += CompactibleFreeListSpace::IndexSetStride) { -#endif // !CFLS_LAB_PARALLEL_ACCESS assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(), "Can't retire more than what we obtained"); if (_num_blocks[i] > 0) { size_t num_retire = _indexedFreeList[i].count(); assert(_num_blocks[i] > num_retire, "Should have used at least one"); { -#if CFLS_LAB_PARALLEL_ACCESS - MutexLockerEx x(_cfls->_indexedFreeListParLocks[i], - Mutex::_no_safepoint_check_flag); -#endif // CFLS_LAB_PARALLEL_ACCESS + // MutexLockerEx x(_cfls->_indexedFreeListParLocks[i], + // Mutex::_no_safepoint_check_flag); + // Update globals stats for num_blocks used _global_num_blocks[i] += (_num_blocks[i] - num_retire); _global_num_workers[i]++; diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Fri Oct 28 15:36:16 2011 -0700 @@ -502,10 +502,14 @@ void verifyFreeLists() const PRODUCT_RETURN; void verifyIndexedFreeLists() const; void verifyIndexedFreeList(size_t size) const; - // verify that the given chunk is in the free lists. + // Verify that the given chunk is in the free lists: + // i.e. either the binary tree dictionary, the indexed free lists + // or the linear allocation block. bool verifyChunkInFreeLists(FreeChunk* fc) const; + // Verify that the given chunk is the linear allocation block + bool verify_chunk_is_linear_alloc_block(FreeChunk* fc) const; // Do some basic checks on the the free lists. - void checkFreeListConsistency() const PRODUCT_RETURN; + void check_free_list_consistency() const PRODUCT_RETURN; // Printing support void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st); diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -147,12 +147,8 @@ } } } while (cm()->restart_for_overflow()); + double counting_start_time = os::elapsedVTime(); - - // YSR: These look dubious (i.e. redundant) !!! FIX ME - slt()->manipulatePLL(SurrogateLockerThread::acquirePLL); - slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL); - if (!cm()->has_aborted()) { double count_start_sec = os::elapsedTime(); if (PrintGC) { @@ -175,6 +171,7 @@ } } } + double end_time = os::elapsedVTime(); _vtime_count_accum += (end_time - counting_start_time); // Update the total virtual time before doing this, since it will try @@ -335,13 +332,15 @@ clear_started(); } -// Note: this method, although exported by the ConcurrentMarkSweepThread, -// which is a non-JavaThread, can only be called by a JavaThread. -// Currently this is done at vm creation time (post-vm-init) by the -// main/Primordial (Java)Thread. -// XXX Consider changing this in the future to allow the CMS thread +// Note: As is the case with CMS - this method, although exported +// by the ConcurrentMarkThread, which is a non-JavaThread, can only +// be called by a JavaThread. Currently this is done at vm creation +// time (post-vm-init) by the main/Primordial (Java)Thread. +// XXX Consider changing this in the future to allow the CM thread // itself to create this thread? void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) { + assert(UseG1GC, "SLT thread needed only for concurrent GC"); + assert(THREAD->is_Java_thread(), "must be a Java thread"); assert(_slt == NULL, "SLT already created"); _slt = SurrogateLockerThread::make(THREAD); } diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -5502,34 +5502,36 @@ CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); double start = os::elapsedTime(); - // Iterate over the dirty cards region list. - G1ParCleanupCTTask cleanup_task(ct_bs, this); - - if (ParallelGCThreads > 0) { - set_par_threads(workers()->total_workers()); - workers()->run_task(&cleanup_task); - set_par_threads(0); - } else { - while (_dirty_cards_region_list) { - HeapRegion* r = _dirty_cards_region_list; - cleanup_task.clear_cards(r); - _dirty_cards_region_list = r->get_next_dirty_cards_region(); - if (_dirty_cards_region_list == r) { - // The last region. - _dirty_cards_region_list = NULL; + { + // Iterate over the dirty cards region list. + G1ParCleanupCTTask cleanup_task(ct_bs, this); + + if (ParallelGCThreads > 0) { + set_par_threads(workers()->total_workers()); + workers()->run_task(&cleanup_task); + set_par_threads(0); + } else { + while (_dirty_cards_region_list) { + HeapRegion* r = _dirty_cards_region_list; + cleanup_task.clear_cards(r); + _dirty_cards_region_list = r->get_next_dirty_cards_region(); + if (_dirty_cards_region_list == r) { + // The last region. + _dirty_cards_region_list = NULL; + } + r->set_next_dirty_cards_region(NULL); } - r->set_next_dirty_cards_region(NULL); } +#ifndef PRODUCT + if (G1VerifyCTCleanup || VerifyAfterGC) { + G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs); + heap_region_iterate(&cleanup_verifier); + } +#endif } double elapsed = os::elapsedTime() - start; g1_policy()->record_clear_ct_time(elapsed * 1000.0); -#ifndef PRODUCT - if (G1VerifyCTCleanup || VerifyAfterGC) { - G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs); - heap_region_iterate(&cleanup_verifier); - } -#endif } void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) { diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -320,6 +320,7 @@ _par_last_termination_attempts = new double[_parallel_gc_threads]; _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads]; _par_last_gc_worker_times_ms = new double[_parallel_gc_threads]; + _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads]; // start conservatively _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis; @@ -497,7 +498,6 @@ initialize_gc_policy_counters(); G1YoungGenSizer sizer; - size_t initial_region_num = sizer.initial_young_region_num(); _min_desired_young_length = sizer.min_young_region_num(); _max_desired_young_length = sizer.max_young_region_num(); @@ -511,17 +511,14 @@ } } - // GenCollectorPolicy guarantees that min <= initial <= max. - // Asserting here just to state that we rely on this property. assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values"); - assert(initial_region_num <= _max_desired_young_length, "Initial young gen size too large"); - assert(_min_desired_young_length <= initial_region_num, "Initial young gen size too small"); set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length); if (adaptive_young_list_length()) { _young_list_fixed_length = 0; } else { - _young_list_fixed_length = initial_region_num; + assert(_min_desired_young_length == _max_desired_young_length, "Min and max young size differ"); + _young_list_fixed_length = _min_desired_young_length; } _free_regions_at_end_of_collection = _g1->free_regions(); update_young_list_target_length(); @@ -976,6 +973,7 @@ _par_last_termination_attempts[i] = -1234.0; _par_last_gc_worker_end_times_ms[i] = -1234.0; _par_last_gc_worker_times_ms[i] = -1234.0; + _par_last_gc_worker_other_times_ms[i] = -1234.0; } #endif @@ -984,8 +982,10 @@ _cur_aux_times_set[i] = false; } - _satb_drain_time_set = false; - _last_satb_drain_processed_buffers = -1; + // These are initialized to zero here and they are set during + // the evacuation pause if marking is in progress. + _cur_satb_drain_time_ms = 0.0; + _last_satb_drain_processed_buffers = 0; _last_young_gc_full = false; @@ -1097,61 +1097,65 @@ (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min); } -void G1CollectorPolicy::print_stats (int level, - const char* str, - double value) { +void G1CollectorPolicy::print_stats(int level, + const char* str, + double value) { LineBuffer(level).append_and_print_cr("[%s: %5.1lf ms]", str, value); } -void G1CollectorPolicy::print_stats (int level, - const char* str, - int value) { +void G1CollectorPolicy::print_stats(int level, + const char* str, + int value) { LineBuffer(level).append_and_print_cr("[%s: %d]", str, value); } -double G1CollectorPolicy::avg_value (double* data) { +double G1CollectorPolicy::avg_value(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = 0.0; - for (uint i = 0; i < ParallelGCThreads; ++i) + for (uint i = 0; i < ParallelGCThreads; ++i) { ret += data[i]; + } return ret / (double) ParallelGCThreads; } else { return data[0]; } } -double G1CollectorPolicy::max_value (double* data) { +double G1CollectorPolicy::max_value(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = data[0]; - for (uint i = 1; i < ParallelGCThreads; ++i) - if (data[i] > ret) + for (uint i = 1; i < ParallelGCThreads; ++i) { + if (data[i] > ret) { ret = data[i]; + } + } return ret; } else { return data[0]; } } -double G1CollectorPolicy::sum_of_values (double* data) { +double G1CollectorPolicy::sum_of_values(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double sum = 0.0; - for (uint i = 0; i < ParallelGCThreads; i++) + for (uint i = 0; i < ParallelGCThreads; i++) { sum += data[i]; + } return sum; } else { return data[0]; } } -double G1CollectorPolicy::max_sum (double* data1, - double* data2) { +double G1CollectorPolicy::max_sum(double* data1, double* data2) { double ret = data1[0] + data2[0]; if (G1CollectedHeap::use_parallel_gc_threads()) { for (uint i = 1; i < ParallelGCThreads; ++i) { double data = data1[i] + data2[i]; - if (data > ret) + if (data > ret) { ret = data; + } } } return ret; @@ -1251,6 +1255,10 @@ _n_pauses++; + // These values are used to update the summary information that is + // displayed when TraceGen0Time is enabled, and are output as part + // of the PrintGCDetails output, in the non-parallel case. + double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms); double update_rs_time = avg_value(_par_last_update_rs_times_ms); @@ -1260,42 +1268,68 @@ double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); double termination_time = avg_value(_par_last_termination_times_ms); - double parallel_known_time = update_rs_time + - ext_root_scan_time + - mark_stack_scan_time + - scan_rs_time + - obj_copy_time + - termination_time; - - double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time; - - PauseSummary* summary = _summary; + double known_time = ext_root_scan_time + + mark_stack_scan_time + + update_rs_time + + scan_rs_time + + obj_copy_time; + + double other_time_ms = elapsed_ms; + + // Subtract the SATB drain time. It's initialized to zero at the + // start of the pause and is updated during the pause if marking + // is in progress. + other_time_ms -= _cur_satb_drain_time_ms; + + if (parallel) { + other_time_ms -= _cur_collection_par_time_ms; + } else { + other_time_ms -= known_time; + } + + // Subtract the time taken to clean the card table from the + // current value of "other time" + other_time_ms -= _cur_clear_ct_time_ms; + + // TraceGen0Time and TraceGen1Time summary info updating. + _all_pause_times_ms->add(elapsed_ms); if (update_stats) { _recent_rs_scan_times_ms->add(scan_rs_time); _recent_pause_times_ms->add(elapsed_ms); _recent_rs_sizes->add(rs_size); - MainBodySummary* body_summary = summary->main_body_summary(); - guarantee(body_summary != NULL, "should not be null!"); - - if (_satb_drain_time_set) - body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); - else - body_summary->record_satb_drain_time_ms(0.0); + _summary->record_total_time_ms(elapsed_ms); + _summary->record_other_time_ms(other_time_ms); + + MainBodySummary* body_summary = _summary->main_body_summary(); + assert(body_summary != NULL, "should not be null!"); + + // This will be non-zero iff marking is currently in progress (i.e. + // _g1->mark_in_progress() == true) and the currrent pause was not + // an initial mark pause. Since the body_summary items are NumberSeqs, + // however, they have to be consistent and updated in lock-step with + // each other. Therefore we unconditionally record the SATB drain + // time - even if it's zero. + body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time); body_summary->record_update_rs_time_ms(update_rs_time); body_summary->record_scan_rs_time_ms(scan_rs_time); body_summary->record_obj_copy_time_ms(obj_copy_time); + if (parallel) { body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); - body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); body_summary->record_termination_time_ms(termination_time); + + double parallel_known_time = known_time + termination_time; + double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time; body_summary->record_parallel_other_time_ms(parallel_other_time); } + body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); + body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); // We exempt parallel collection from this check because Alloc Buffer // fragmentation can produce negative collections. Same with evac @@ -1307,6 +1341,7 @@ || _g1->evacuation_failed() || surviving_bytes <= _collection_set_bytes_used_before, "Or else negative collection!"); + _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before); _recent_CS_bytes_surviving->add(surviving_bytes); @@ -1357,6 +1392,13 @@ } } + for (int i = 0; i < _aux_num; ++i) { + if (_cur_aux_times_set[i]) { + _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); + } + } + + if (G1PolicyVerbose > 1) { gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); } @@ -1383,61 +1425,60 @@ recent_avg_pause_time_ratio() * 100.0); } - double other_time_ms = elapsed_ms; - - if (_satb_drain_time_set) { - other_time_ms -= _cur_satb_drain_time_ms; - } - - if (parallel) { - other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms; - } else { - other_time_ms -= - update_rs_time + - ext_root_scan_time + mark_stack_scan_time + - scan_rs_time + obj_copy_time; - } - + // PrintGCDetails output if (PrintGCDetails) { + bool print_marking_info = + _g1->mark_in_progress() && !last_pause_included_initial_mark; + gclog_or_tty->print_cr("%s, %1.8lf secs]", (last_pause_included_initial_mark) ? " (initial-mark)" : "", elapsed_ms / 1000.0); - if (_satb_drain_time_set) { + if (print_marking_info) { print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); - } - if (_last_satb_drain_processed_buffers >= 0) { print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); } + if (parallel) { print_stats(1, "Parallel Time", _cur_collection_par_time_ms); - print_par_stats(2, "GC Worker Start Time", _par_last_gc_worker_start_times_ms); + print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms); + print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); + if (print_marking_info) { + print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); + } print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers); - print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); - print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms); print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms); print_par_stats(2, "Termination", _par_last_termination_times_ms); print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts); - print_par_stats(2, "GC Worker End Time", _par_last_gc_worker_end_times_ms); + print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms); for (int i = 0; i < _parallel_gc_threads; i++) { _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i]; + + double worker_known_time = _par_last_ext_root_scan_times_ms[i] + + _par_last_mark_stack_scan_times_ms[i] + + _par_last_update_rs_times_ms[i] + + _par_last_scan_rs_times_ms[i] + + _par_last_obj_copy_times_ms[i] + + _par_last_termination_times_ms[i]; + + _par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time; } - print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms); - - print_stats(2, "Parallel Other", parallel_other_time); - print_stats(1, "Clear CT", _cur_clear_ct_time_ms); + print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms); + print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms); } else { + print_stats(1, "Ext Root Scanning", ext_root_scan_time); + if (print_marking_info) { + print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); + } print_stats(1, "Update RS", update_rs_time); - print_stats(2, "Processed Buffers", - (int)update_rs_processed_buffers); - print_stats(1, "Ext Root Scanning", ext_root_scan_time); - print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); + print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers); print_stats(1, "Scan RS", scan_rs_time); print_stats(1, "Object Copying", obj_copy_time); } + print_stats(1, "Clear CT", _cur_clear_ct_time_ms); #ifndef PRODUCT print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms); @@ -1461,16 +1502,6 @@ } } - _all_pause_times_ms->add(elapsed_ms); - if (update_stats) { - summary->record_total_time_ms(elapsed_ms); - summary->record_other_time_ms(other_time_ms); - } - for (int i = 0; i < _aux_num; ++i) - if (_cur_aux_times_set[i]) { - _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); - } - // Update the efficiency-since-mark vars. double proc_ms = elapsed_ms * (double) _parallel_gc_threads; if (elapsed_ms < MIN_TIMER_GRANULARITY) { @@ -2138,17 +2169,17 @@ _g1->collection_set_iterate(&cs_closure); } -void G1CollectorPolicy::print_summary (int level, - const char* str, - NumberSeq* seq) const { +void G1CollectorPolicy::print_summary(int level, + const char* str, + NumberSeq* seq) const { double sum = seq->sum(); LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)", str, sum / 1000.0, seq->avg()); } -void G1CollectorPolicy::print_summary_sd (int level, - const char* str, - NumberSeq* seq) const { +void G1CollectorPolicy::print_summary_sd(int level, + const char* str, + NumberSeq* seq) const { print_summary(level, str, seq); LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)", seq->num(), seq->sd(), seq->maximum()); @@ -2211,20 +2242,18 @@ print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq()); if (parallel) { print_summary(1, "Parallel Time", body_summary->get_parallel_seq()); + print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq()); + print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq()); print_summary(2, "Update RS", body_summary->get_update_rs_seq()); - print_summary(2, "Ext Root Scanning", - body_summary->get_ext_root_scan_seq()); - print_summary(2, "Mark Stack Scanning", - body_summary->get_mark_stack_scan_seq()); print_summary(2, "Scan RS", body_summary->get_scan_rs_seq()); print_summary(2, "Object Copy", body_summary->get_obj_copy_seq()); print_summary(2, "Termination", body_summary->get_termination_seq()); - print_summary(2, "Other", body_summary->get_parallel_other_seq()); + print_summary(2, "Parallel Other", body_summary->get_parallel_other_seq()); { NumberSeq* other_parts[] = { - body_summary->get_update_rs_seq(), body_summary->get_ext_root_scan_seq(), body_summary->get_mark_stack_scan_seq(), + body_summary->get_update_rs_seq(), body_summary->get_scan_rs_seq(), body_summary->get_obj_copy_seq(), body_summary->get_termination_seq() @@ -2234,18 +2263,16 @@ check_other_times(2, body_summary->get_parallel_other_seq(), &calc_other_times_ms); } - print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq()); - print_summary(1, "Clear CT", body_summary->get_clear_ct_seq()); } else { + print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq()); + print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq()); print_summary(1, "Update RS", body_summary->get_update_rs_seq()); - print_summary(1, "Ext Root Scanning", - body_summary->get_ext_root_scan_seq()); - print_summary(1, "Mark Stack Scanning", - body_summary->get_mark_stack_scan_seq()); print_summary(1, "Scan RS", body_summary->get_scan_rs_seq()); print_summary(1, "Object Copy", body_summary->get_obj_copy_seq()); } } + print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq()); + print_summary(1, "Clear CT", body_summary->get_clear_ct_seq()); print_summary(1, "Other", summary->get_other_seq()); { if (body_summary != NULL) { diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Oct 28 15:36:16 2011 -0700 @@ -74,7 +74,7 @@ define_num_seq(termination) // parallel only define_num_seq(parallel_other) // parallel only define_num_seq(mark_closure) - define_num_seq(clear_ct) // parallel only + define_num_seq(clear_ct) }; class Summary: public PauseSummary, @@ -115,7 +115,6 @@ double _cur_collection_par_time_ms; double _cur_satb_drain_time_ms; double _cur_clear_ct_time_ms; - bool _satb_drain_time_set; double _cur_ref_proc_time_ms; double _cur_ref_enq_time_ms; @@ -176,6 +175,11 @@ double* _par_last_gc_worker_end_times_ms; double* _par_last_gc_worker_times_ms; + // Each workers 'other' time i.e. the elapsed time of the parallel + // phase of the pause minus the sum of the individual sub-phase + // times for a given worker thread. + double* _par_last_gc_worker_other_times_ms; + // indicates whether we are in full young or partially young GC mode bool _full_young_gcs; @@ -892,11 +896,12 @@ } void record_satb_drain_time(double ms) { + assert(_g1->mark_in_progress(), "shouldn't be here otherwise"); _cur_satb_drain_time_ms = ms; - _satb_drain_time_set = true; } - void record_satb_drain_processed_buffers (int processed_buffers) { + void record_satb_drain_processed_buffers(int processed_buffers) { + assert(_g1->mark_in_progress(), "shouldn't be here otherwise"); _last_satb_drain_processed_buffers = processed_buffers; } diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -122,10 +122,10 @@ void set_try_claimed() { _try_claimed = true; } void scanCard(size_t index, HeapRegion *r) { - DirtyCardToOopClosure* cl = - r->new_dcto_closure(_oc, - CardTableModRefBS::Precise, - HeapRegionDCTOC::IntoCSFilterKind); + // Stack allocate the DirtyCardToOopClosure instance + HeapRegionDCTOC cl(_g1h, r, _oc, + CardTableModRefBS::Precise, + HeapRegionDCTOC::IntoCSFilterKind); // Set the "from" region in the closure. _oc->set_region(r); @@ -140,7 +140,7 @@ // scans (the rsets of the regions in the cset can intersect). _ct_bs->set_card_claimed(index); _cards_done++; - cl->do_MemRegion(mr); + cl.do_MemRegion(mr); } } diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -340,14 +340,6 @@ init_top_at_mark_start(); } -DirtyCardToOopClosure* -HeapRegion::new_dcto_closure(OopClosure* cl, - CardTableModRefBS::PrecisionStyle precision, - HeapRegionDCTOC::FilterKind fk) { - return new HeapRegionDCTOC(G1CollectedHeap::heap(), - this, cl, precision, fk); -} - void HeapRegion::hr_clear(bool par, bool clear_space) { assert(_humongous_type == NotHumongous, "we should have already filtered out humongous regions"); diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Fri Oct 28 15:36:16 2011 -0700 @@ -431,6 +431,14 @@ return _humongous_start_region; } + // Same as Space::is_in_reserved, but will use the original size of the region. + // The original size is different only for start humongous regions. They get + // their _end set up to be the end of the last continues region of the + // corresponding humongous object. + bool is_in_reserved_raw(const void* p) const { + return _bottom <= p && p < _orig_end; + } + // Makes the current region be a "starts humongous" region, i.e., // the first region in a series of one or more contiguous regions // that will contain a single "humongous" object. The two parameters @@ -569,11 +577,6 @@ // allocated in the current region before the last call to "save_mark". void oop_before_save_marks_iterate(OopClosure* cl); - DirtyCardToOopClosure* - new_dcto_closure(OopClosure* cl, - CardTableModRefBS::PrecisionStyle precision, - HeapRegionDCTOC::FilterKind fk); - // Note the start or end of marking. This tells the heap region // that the collector is about to start or has finished (concurrently) // marking the heap. diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -143,7 +143,11 @@ // If the test below fails, then this table was reused concurrently // with this operation. This is OK, since the old table was coarsened, // and adding a bit to the new table is never incorrect. - if (loc_hr->is_in_reserved(from)) { + // If the table used to belong to a continues humongous region and is + // now reused for the corresponding start humongous region, we need to + // make sure that we detect this. Thus, we call is_in_reserved_raw() + // instead of just is_in_reserved() here. + if (loc_hr->is_in_reserved_raw(from)) { size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom()); CardIdx_t from_card = (CardIdx_t) hw_offset >> (CardTableModRefBS::card_shift - LogHeapWordSize); diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/vm_operations_g1.cpp --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "gc_implementation/g1/concurrentMarkThread.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" #include "gc_implementation/g1/vm_operations_g1.hpp" @@ -165,6 +166,20 @@ } } +void VM_CGC_Operation::acquire_pending_list_lock() { + // The caller may block while communicating + // with the SLT thread in order to acquire/release the PLL. + ConcurrentMarkThread::slt()-> + manipulatePLL(SurrogateLockerThread::acquirePLL); +} + +void VM_CGC_Operation::release_and_notify_pending_list_lock() { + // The caller may block while communicating + // with the SLT thread in order to acquire/release the PLL. + ConcurrentMarkThread::slt()-> + manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL); +} + void VM_CGC_Operation::doit() { gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); @@ -180,12 +195,19 @@ } bool VM_CGC_Operation::doit_prologue() { + // Note the relative order of the locks must match that in + // VM_GC_Operation::doit_prologue() or deadlocks can occur + acquire_pending_list_lock(); + Heap_lock->lock(); SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true; return true; } void VM_CGC_Operation::doit_epilogue() { + // Note the relative order of the unlocks must match that in + // VM_GC_Operation::doit_epilogue() SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false; Heap_lock->unlock(); + release_and_notify_pending_list_lock(); } diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/vm_operations_g1.hpp --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp Fri Oct 28 15:36:16 2011 -0700 @@ -93,11 +93,17 @@ } }; -// Concurrent GC stop-the-world operations such as initial and final mark; +// Concurrent GC stop-the-world operations such as remark and cleanup; // consider sharing these with CMS's counterparts. class VM_CGC_Operation: public VM_Operation { VoidClosure* _cl; const char* _printGCMessage; + +protected: + // java.lang.ref.Reference support + void acquire_pending_list_lock(); + void release_and_notify_pending_list_lock(); + public: VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) : _cl(cl), _printGCMessage(printGCMsg) { } diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/shared/concurrentGCThread.cpp --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Thu Oct 27 13:54:31 2011 -0700 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Fri Oct 28 15:36:16 2011 -0700 @@ -224,6 +224,8 @@ MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag); assert(_buffer == empty, "Should be empty"); assert(msg != empty, "empty message"); + assert(!Heap_lock->owned_by_self(), "Heap_lock owned by requesting thread"); + _buffer = msg; while (_buffer != empty) { _monitor.notify();