# HG changeset patch # User dcubed # Date 1370523393 25200 # Node ID f8c8cace25ad72da7b2f0748270c6a394b15b437 # Parent 04551f4dbdb9da1ecb6b9bb41c831ede07e2e18c# Parent 6bf8b8bb7c19316ce03748d10f40f254def85769 Merge diff -r 6bf8b8bb7c19 -r f8c8cace25ad make/excludeSrc.make --- a/make/excludeSrc.make Wed Jun 05 14:12:49 2013 -0400 +++ b/make/excludeSrc.make Thu Jun 06 05:56:33 2013 -0700 @@ -87,7 +87,7 @@ g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \ g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \ g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp \ - g1RemSet.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \ + g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \ heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \ ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \ adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \ diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/os/bsd/vm/os_bsd.cpp --- a/src/os/bsd/vm/os_bsd.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/os/bsd/vm/os_bsd.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -935,10 +935,10 @@ return (1000 * 1000); } -// XXX: For now, code this as if BSD does not support vtime. -bool os::supports_vtime() { return false; } +bool os::supports_vtime() { return true; } bool os::enable_vtime() { return false; } bool os::vtime_enabled() { return false; } + double os::elapsedVTime() { // better than nothing, but not much return elapsedTime(); diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/os/linux/vm/os_linux.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -101,6 +101,12 @@ # include # include +// if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling +// getrusage() is prepared to handle the associated failure. +#ifndef RUSAGE_THREAD +#define RUSAGE_THREAD (1) /* only the calling thread */ +#endif + #define MAX_PATH (2 * K) // for timer info max values which include all bits @@ -1336,15 +1342,19 @@ return (1000 * 1000); } -// For now, we say that linux does not support vtime. I have no idea -// whether it can actually be made to (DLD, 9/13/05). - -bool os::supports_vtime() { return false; } +bool os::supports_vtime() { return true; } bool os::enable_vtime() { return false; } bool os::vtime_enabled() { return false; } + double os::elapsedVTime() { - // better than nothing, but not much - return elapsedTime(); + struct rusage usage; + int retval = getrusage(RUSAGE_THREAD, &usage); + if (retval == 0) { + return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000); + } else { + // better than nothing, but not much + return elapsedTime(); + } } jlong os::javaTimeMillis() { diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/os/windows/vm/os_windows.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -813,15 +813,21 @@ return result; } -// For now, we say that Windows does not support vtime. I have no idea -// whether it can actually be made to (DLD, 9/13/05). - -bool os::supports_vtime() { return false; } +bool os::supports_vtime() { return true; } bool os::enable_vtime() { return false; } bool os::vtime_enabled() { return false; } + double os::elapsedVTime() { - // better than nothing, but not much - return elapsedTime(); + FILETIME created; + FILETIME exited; + FILETIME kernel; + FILETIME user; + if (GetThreadTimes(GetCurrentThread(), &created, &exited, &kernel, &user) != 0) { + // the resolution of windows_to_java_time() should be sufficient (ms) + return (double) (windows_to_java_time(kernel) + windows_to_java_time(user)) / MILLIUNITS; + } else { + return elapsedTime(); + } } jlong os::javaTimeMillis() { diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -114,6 +114,14 @@ } } +void ConcurrentG1Refine::worker_threads_do(ThreadClosure * tc) { + if (_threads != NULL) { + for (int i = 0; i < worker_thread_num(); i++) { + tc->do_thread(_threads[i]); + } + } +} + int ConcurrentG1Refine::thread_num() { int n_threads = (G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads; @@ -126,3 +134,7 @@ st->cr(); } } + +ConcurrentG1RefineThread * ConcurrentG1Refine::sampling_thread() const { + return _threads[worker_thread_num()]; +} diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -35,6 +35,7 @@ class G1CollectedHeap; class G1HotCardCache; class G1RemSet; +class DirtyCardQueue; class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread** _threads; @@ -78,9 +79,15 @@ void reinitialize_threads(); - // Iterate over the conc refine threads + // Iterate over all concurrent refinement threads void threads_do(ThreadClosure *tc); + // Iterate over all worker refinement threads + void worker_threads_do(ThreadClosure * tc); + + // The RS sampling thread + ConcurrentG1RefineThread * sampling_thread() const; + static int thread_num(); void print_worker_threads_on(outputStream* st) const; diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -1417,8 +1417,6 @@ MemoryService::track_memory_usage(); - verify_after_gc(); - assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); ref_processor_stw()->verify_no_references_recorded(); @@ -1521,6 +1519,8 @@ _hrs.verify_optional(); verify_region_sets_optional(); + verify_after_gc(); + // Start a new incremental collection set for the next pause assert(g1_policy()->collection_set() == NULL, "must be"); g1_policy()->start_incremental_cset_building(); @@ -3539,6 +3539,14 @@ } void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) { + + if (G1SummarizeRSetStats && + (G1SummarizeRSetStatsPeriod > 0) && + // we are at the end of the GC. Total collections has already been increased. + ((total_collections() - 1) % G1SummarizeRSetStatsPeriod == 0)) { + g1_rem_set()->print_periodic_summary_info(); + } + // FIXME: what is this about? // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled" // is set. @@ -4093,12 +4101,6 @@ g1mm()->update_sizes(); } - if (G1SummarizeRSetStats && - (G1SummarizeRSetStatsPeriod > 0) && - (total_collections() % G1SummarizeRSetStatsPeriod == 0)) { - g1_rem_set()->print_summary_info(); - } - // It should now be safe to tell the concurrent mark thread to start // without its logging output interfering with the logging output // that came from the pause. diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -593,11 +593,6 @@ // may not be a humongous - it must fit into a single heap region. HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size); - HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose, - HeapRegion* alloc_region, - bool par, - size_t word_size); - // Ensure that no further allocations can happen in "r", bearing in mind // that parallel threads might be attempting allocations. void par_allocate_remaining_space(HeapRegion* r); @@ -1733,6 +1728,95 @@ ParGCAllocBuffer::retire(end_of_gc, retain); _retired = true; } + + bool is_retired() { + return _retired; + } +}; + +class G1ParGCAllocBufferContainer { +protected: + static int const _priority_max = 2; + G1ParGCAllocBuffer* _priority_buffer[_priority_max]; + +public: + G1ParGCAllocBufferContainer(size_t gclab_word_size) { + for (int pr = 0; pr < _priority_max; ++pr) { + _priority_buffer[pr] = new G1ParGCAllocBuffer(gclab_word_size); + } + } + + ~G1ParGCAllocBufferContainer() { + for (int pr = 0; pr < _priority_max; ++pr) { + assert(_priority_buffer[pr]->is_retired(), "alloc buffers should all retire at this point."); + delete _priority_buffer[pr]; + } + } + + HeapWord* allocate(size_t word_sz) { + HeapWord* obj; + for (int pr = 0; pr < _priority_max; ++pr) { + obj = _priority_buffer[pr]->allocate(word_sz); + if (obj != NULL) return obj; + } + return obj; + } + + bool contains(void* addr) { + for (int pr = 0; pr < _priority_max; ++pr) { + if (_priority_buffer[pr]->contains(addr)) return true; + } + return false; + } + + void undo_allocation(HeapWord* obj, size_t word_sz) { + bool finish_undo; + for (int pr = 0; pr < _priority_max; ++pr) { + if (_priority_buffer[pr]->contains(obj)) { + _priority_buffer[pr]->undo_allocation(obj, word_sz); + finish_undo = true; + } + } + if (!finish_undo) ShouldNotReachHere(); + } + + size_t words_remaining() { + size_t result = 0; + for (int pr = 0; pr < _priority_max; ++pr) { + result += _priority_buffer[pr]->words_remaining(); + } + return result; + } + + size_t words_remaining_in_retired_buffer() { + G1ParGCAllocBuffer* retired = _priority_buffer[0]; + return retired->words_remaining(); + } + + void flush_stats_and_retire(PLABStats* stats, bool end_of_gc, bool retain) { + for (int pr = 0; pr < _priority_max; ++pr) { + _priority_buffer[pr]->flush_stats_and_retire(stats, end_of_gc, retain); + } + } + + void update(bool end_of_gc, bool retain, HeapWord* buf, size_t word_sz) { + G1ParGCAllocBuffer* retired_and_set = _priority_buffer[0]; + retired_and_set->retire(end_of_gc, retain); + retired_and_set->set_buf(buf); + retired_and_set->set_word_size(word_sz); + adjust_priority_order(); + } + +private: + void adjust_priority_order() { + G1ParGCAllocBuffer* retired_and_set = _priority_buffer[0]; + + int last = _priority_max - 1; + for (int pr = 0; pr < last; ++pr) { + _priority_buffer[pr] = _priority_buffer[pr + 1]; + } + _priority_buffer[last] = retired_and_set; + } }; class G1ParScanThreadState : public StackObj { @@ -1743,9 +1827,9 @@ CardTableModRefBS* _ct_bs; G1RemSet* _g1_rem; - G1ParGCAllocBuffer _surviving_alloc_buffer; - G1ParGCAllocBuffer _tenured_alloc_buffer; - G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount]; + G1ParGCAllocBufferContainer _surviving_alloc_buffer; + G1ParGCAllocBufferContainer _tenured_alloc_buffer; + G1ParGCAllocBufferContainer* _alloc_buffers[GCAllocPurposeCount]; ageTable _age_table; size_t _alloc_buffer_waste; @@ -1809,7 +1893,7 @@ RefToScanQueue* refs() { return _refs; } ageTable* age_table() { return &_age_table; } - G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { + G1ParGCAllocBufferContainer* alloc_buffer(GCAllocPurpose purpose) { return _alloc_buffers[purpose]; } @@ -1839,15 +1923,13 @@ HeapWord* obj = NULL; size_t gclab_word_size = _g1h->desired_plab_sz(purpose); if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) { - G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose); - add_to_alloc_buffer_waste(alloc_buf->words_remaining()); - alloc_buf->retire(false /* end_of_gc */, false /* retain */); + G1ParGCAllocBufferContainer* alloc_buf = alloc_buffer(purpose); HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size); if (buf == NULL) return NULL; // Let caller handle allocation failure. - // Otherwise. - alloc_buf->set_word_size(gclab_word_size); - alloc_buf->set_buf(buf); + + add_to_alloc_buffer_waste(alloc_buf->words_remaining_in_retired_buffer()); + alloc_buf->update(false /* end_of_gc */, false /* retain */, buf, gclab_word_size); obj = alloc_buf->allocate(word_sz); assert(obj != NULL, "buffer was definitely big enough..."); @@ -1959,7 +2041,6 @@ } } -public: void trim_queue(); }; diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -34,6 +34,7 @@ #include "gc_implementation/g1/g1OopClosures.inline.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" #include "gc_implementation/g1/heapRegionSeq.inline.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" #include "memory/iterator.hpp" #include "oops/oop.inline.hpp" #include "utilities/intHisto.hpp" @@ -73,7 +74,8 @@ _ct_bs(ct_bs), _g1p(_g1->g1_policy()), _cg1r(g1->concurrent_g1_refine()), _cset_rs_update_cl(NULL), - _cards_scanned(NULL), _total_cards_scanned(0) + _cards_scanned(NULL), _total_cards_scanned(0), + _prev_period_summary() { _seq_task = new SubTasksDone(NumSeqTasks); guarantee(n_workers() > 0, "There should be some workers"); @@ -81,6 +83,7 @@ for (uint i = 0; i < n_workers(); i++) { _cset_rs_update_cl[i] = NULL; } + _prev_period_summary.initialize(this, n_workers()); } G1RemSet::~G1RemSet() { @@ -697,47 +700,29 @@ return has_refs_into_cset; } -class HRRSStatsIter: public HeapRegionClosure { - size_t _occupied; - size_t _total_mem_sz; - size_t _max_mem_sz; - HeapRegion* _max_mem_sz_region; -public: - HRRSStatsIter() : - _occupied(0), - _total_mem_sz(0), - _max_mem_sz(0), - _max_mem_sz_region(NULL) - {} +void G1RemSet::print_periodic_summary_info() { + G1RemSetSummary current; + current.initialize(this, n_workers()); - bool doHeapRegion(HeapRegion* r) { - if (r->continuesHumongous()) return false; - size_t mem_sz = r->rem_set()->mem_size(); - if (mem_sz > _max_mem_sz) { - _max_mem_sz = mem_sz; - _max_mem_sz_region = r; - } - _total_mem_sz += mem_sz; - size_t occ = r->rem_set()->occupied(); - _occupied += occ; - return false; - } - size_t total_mem_sz() { return _total_mem_sz; } - size_t max_mem_sz() { return _max_mem_sz; } - size_t occupied() { return _occupied; } - HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } -}; + _prev_period_summary.subtract_from(¤t); + print_summary_info(&_prev_period_summary); -class PrintRSThreadVTimeClosure : public ThreadClosure { -public: - virtual void do_thread(Thread *t) { - ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; - gclog_or_tty->print(" %5.2f", crt->vtime_accum()); - } -}; + _prev_period_summary.set(¤t); +} void G1RemSet::print_summary_info() { - G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1RemSetSummary current; + current.initialize(this, n_workers()); + + print_summary_info(¤t, " Cumulative RS summary"); +} + +void G1RemSet::print_summary_info(G1RemSetSummary * summary, const char * header) { + assert(summary != NULL, "just checking"); + + if (header != NULL) { + gclog_or_tty->print_cr("%s", header); + } #if CARD_REPEAT_HISTO gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); @@ -745,52 +730,13 @@ card_repeat_count.print_on(gclog_or_tty); #endif - gclog_or_tty->print_cr("\n Concurrent RS processed %d cards", - _conc_refine_cards); - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - jint tot_processed_buffers = - dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); - gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); - gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.", - dcqs.processed_buffers_rs_thread(), - 100.0*(float)dcqs.processed_buffers_rs_thread()/ - (float)tot_processed_buffers); - gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.", - dcqs.processed_buffers_mut(), - 100.0*(float)dcqs.processed_buffers_mut()/ - (float)tot_processed_buffers); - gclog_or_tty->print_cr(" Conc RS threads times(s)"); - PrintRSThreadVTimeClosure p; - gclog_or_tty->print(" "); - g1->concurrent_g1_refine()->threads_do(&p); - gclog_or_tty->print_cr(""); - - HRRSStatsIter blk; - g1->heap_region_iterate(&blk); - gclog_or_tty->print_cr(" Total heap region rem set sizes = "SIZE_FORMAT"K." - " Max = "SIZE_FORMAT"K.", - blk.total_mem_sz()/K, blk.max_mem_sz()/K); - gclog_or_tty->print_cr(" Static structures = "SIZE_FORMAT"K," - " free_lists = "SIZE_FORMAT"K.", - HeapRegionRemSet::static_mem_size() / K, - HeapRegionRemSet::fl_mem_size() / K); - gclog_or_tty->print_cr(" "SIZE_FORMAT" occupied cards represented.", - blk.occupied()); - HeapRegion* max_mem_sz_region = blk.max_mem_sz_region(); - HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set(); - gclog_or_tty->print_cr(" Max size region = "HR_FORMAT", " - "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.", - HR_FORMAT_PARAMS(max_mem_sz_region), - (rem_set->mem_size() + K - 1)/K, - (rem_set->occupied() + K - 1)/K); - gclog_or_tty->print_cr(" Did %d coarsenings.", - HeapRegionRemSet::n_coarsenings()); + summary->print_on(gclog_or_tty); } void G1RemSet::prepare_for_verify() { if (G1HRRSFlushLogBuffersOnVerify && (VerifyBeforeGC || VerifyAfterGC) - && !_g1->full_collection()) { + && (!_g1->full_collection() || G1VerifyRSetsDuringFullGC)) { cleanupHRRS(); _g1->set_refine_cte_cl_concurrency(false); if (SafepointSynchronize::is_at_safepoint()) { diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -25,6 +25,8 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP +#include "gc_implementation/g1/g1RemSetSummary.hpp" + // A G1RemSet provides ways of iterating over pointers into a selected // collection set. @@ -37,9 +39,11 @@ // so that they can be used to update the individual region remsets. class G1RemSet: public CHeapObj { +private: + G1RemSetSummary _prev_period_summary; protected: G1CollectedHeap* _g1; - unsigned _conc_refine_cards; + size_t _conc_refine_cards; uint n_workers(); protected: @@ -66,6 +70,8 @@ // references into the collection set. OopsInHeapRegionClosure** _cset_rs_update_cl; + // Print the given summary info + virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL); public: // This is called to reset dual hash tables after the gc pause // is finished and the initial hash table is no longer being @@ -123,11 +129,18 @@ int worker_i, bool check_for_refs_into_cset); - // Print any relevant summary info. + // Print accumulated summary info from the start of the VM. virtual void print_summary_info(); + // Print accumulated summary info from the last time called. + virtual void print_periodic_summary_info(); + // Prepare remembered set for verification. virtual void prepare_for_verify(); + + size_t conc_refine_cards() const { + return _conc_refine_cards; + } }; class CountNonCleanMemRegionClosure: public MemRegionClosure { diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/concurrentG1Refine.hpp" +#include "gc_implementation/g1/concurrentG1RefineThread.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1RemSet.inline.hpp" +#include "gc_implementation/g1/g1RemSetSummary.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" +#include "runtime/thread.inline.hpp" + +class GetRSThreadVTimeClosure : public ThreadClosure { +private: + G1RemSetSummary* _summary; + uint _counter; + +public: + GetRSThreadVTimeClosure(G1RemSetSummary * summary) : ThreadClosure(), _summary(summary), _counter(0) { + assert(_summary != NULL, "just checking"); + } + + virtual void do_thread(Thread* t) { + ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; + _summary->set_rs_thread_vtime(_counter, crt->vtime_accum()); + _counter++; + } +}; + +void G1RemSetSummary::update() { + _num_refined_cards = remset()->conc_refine_cards(); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + _num_processed_buf_mutator = dcqs.processed_buffers_mut(); + _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread(); + + _num_coarsenings = HeapRegionRemSet::n_coarsenings(); + + ConcurrentG1Refine * cg1r = G1CollectedHeap::heap()->concurrent_g1_refine(); + if (_rs_threads_vtimes != NULL) { + GetRSThreadVTimeClosure p(this); + cg1r->worker_threads_do(&p); + } + set_sampling_thread_vtime(cg1r->sampling_thread()->vtime_accum()); +} + +void G1RemSetSummary::set_rs_thread_vtime(uint thread, double value) { + assert(_rs_threads_vtimes != NULL, "just checking"); + assert(thread < _num_vtimes, "just checking"); + _rs_threads_vtimes[thread] = value; +} + +double G1RemSetSummary::rs_thread_vtime(uint thread) const { + assert(_rs_threads_vtimes != NULL, "just checking"); + assert(thread < _num_vtimes, "just checking"); + return _rs_threads_vtimes[thread]; +} + +void G1RemSetSummary::initialize(G1RemSet* remset, uint num_workers) { + assert(_rs_threads_vtimes == NULL, "just checking"); + assert(remset != NULL, "just checking"); + + _remset = remset; + _num_vtimes = num_workers; + _rs_threads_vtimes = NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC); + memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes); + + update(); +} + +void G1RemSetSummary::set(G1RemSetSummary* other) { + assert(other != NULL, "just checking"); + assert(remset() == other->remset(), "just checking"); + assert(_num_vtimes == other->_num_vtimes, "just checking"); + + _num_refined_cards = other->num_concurrent_refined_cards(); + + _num_processed_buf_mutator = other->num_processed_buf_mutator(); + _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads(); + + _num_coarsenings = other->_num_coarsenings; + + memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes); + + set_sampling_thread_vtime(other->sampling_thread_vtime()); +} + +void G1RemSetSummary::subtract_from(G1RemSetSummary* other) { + assert(other != NULL, "just checking"); + assert(remset() == other->remset(), "just checking"); + assert(_num_vtimes == other->_num_vtimes, "just checking"); + + _num_refined_cards = other->num_concurrent_refined_cards() - _num_refined_cards; + + _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator; + _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads; + + _num_coarsenings = other->num_coarsenings() - _num_coarsenings; + + for (uint i = 0; i < _num_vtimes; i++) { + set_rs_thread_vtime(i, other->rs_thread_vtime(i) - rs_thread_vtime(i)); + } + + _sampling_thread_vtime = other->sampling_thread_vtime() - _sampling_thread_vtime; +} + +class HRRSStatsIter: public HeapRegionClosure { + size_t _occupied; + size_t _total_mem_sz; + size_t _max_mem_sz; + HeapRegion* _max_mem_sz_region; +public: + HRRSStatsIter() : + _occupied(0), + _total_mem_sz(0), + _max_mem_sz(0), + _max_mem_sz_region(NULL) + {} + + bool doHeapRegion(HeapRegion* r) { + size_t mem_sz = r->rem_set()->mem_size(); + if (mem_sz > _max_mem_sz) { + _max_mem_sz = mem_sz; + _max_mem_sz_region = r; + } + _total_mem_sz += mem_sz; + size_t occ = r->rem_set()->occupied(); + _occupied += occ; + return false; + } + size_t total_mem_sz() { return _total_mem_sz; } + size_t max_mem_sz() { return _max_mem_sz; } + size_t occupied() { return _occupied; } + HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } +}; + +double calc_percentage(size_t numerator, size_t denominator) { + if (denominator != 0) { + return (double)numerator / denominator * 100.0; + } else { + return 0.0f; + } +} + +void G1RemSetSummary::print_on(outputStream* out) { + out->print_cr("\n Concurrent RS processed "SIZE_FORMAT" cards", + num_concurrent_refined_cards()); + out->print_cr(" Of %d completed buffers:", num_processed_buf_total()); + out->print_cr(" %8d (%5.1f%%) by concurrent RS threads.", + num_processed_buf_total(), + calc_percentage(num_processed_buf_rs_threads(), num_processed_buf_total())); + out->print_cr(" %8d (%5.1f%%) by mutator threads.", + num_processed_buf_mutator(), + calc_percentage(num_processed_buf_mutator(), num_processed_buf_total())); + out->print_cr(" Concurrent RS threads times (s)"); + out->print(" "); + for (uint i = 0; i < _num_vtimes; i++) { + out->print(" %5.2f", rs_thread_vtime(i)); + } + out->cr(); + out->print_cr(" Concurrent sampling threads times (s)"); + out->print_cr(" %5.2f", sampling_thread_vtime()); + + HRRSStatsIter blk; + G1CollectedHeap::heap()->heap_region_iterate(&blk); + out->print_cr(" Total heap region rem set sizes = "SIZE_FORMAT"K." + " Max = "SIZE_FORMAT"K.", + blk.total_mem_sz()/K, blk.max_mem_sz()/K); + out->print_cr(" Static structures = "SIZE_FORMAT"K," + " free_lists = "SIZE_FORMAT"K.", + HeapRegionRemSet::static_mem_size() / K, + HeapRegionRemSet::fl_mem_size() / K); + out->print_cr(" "SIZE_FORMAT" occupied cards represented.", + blk.occupied()); + HeapRegion* max_mem_sz_region = blk.max_mem_sz_region(); + HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set(); + out->print_cr(" Max size region = "HR_FORMAT", " + "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.", + HR_FORMAT_PARAMS(max_mem_sz_region), + (rem_set->mem_size() + K - 1)/K, + (rem_set->occupied() + K - 1)/K); + + out->print_cr(" Did %d coarsenings.", num_coarsenings()); +} diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1RemSetSummary.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1RemSetSummary.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP + +#include "utilities/ostream.hpp" + +class G1RemSet; + +// A G1RemSetSummary manages statistical information about the G1RemSet + +class G1RemSetSummary VALUE_OBJ_CLASS_SPEC { +private: + friend class GetRSThreadVTimeClosure; + + G1RemSet* _remset; + + G1RemSet* remset() const { + return _remset; + } + + size_t _num_refined_cards; + size_t _num_processed_buf_mutator; + size_t _num_processed_buf_rs_threads; + + size_t _num_coarsenings; + + double* _rs_threads_vtimes; + size_t _num_vtimes; + + double _sampling_thread_vtime; + + void set_rs_thread_vtime(uint thread, double value); + void set_sampling_thread_vtime(double value) { + _sampling_thread_vtime = value; + } + + void free_and_null() { + if (_rs_threads_vtimes) { + FREE_C_HEAP_ARRAY(double, _rs_threads_vtimes, mtGC); + _rs_threads_vtimes = NULL; + _num_vtimes = 0; + } + } + + // update this summary with current data from various places + void update(); + +public: + G1RemSetSummary() : _remset(NULL), _num_refined_cards(0), + _num_processed_buf_mutator(0), _num_processed_buf_rs_threads(0), _num_coarsenings(0), + _rs_threads_vtimes(NULL), _num_vtimes(0), _sampling_thread_vtime(0.0f) { + } + + ~G1RemSetSummary() { + free_and_null(); + } + + // set the counters in this summary to the values of the others + void set(G1RemSetSummary* other); + // subtract all counters from the other summary, and set them in the current + void subtract_from(G1RemSetSummary* other); + + // initialize and get the first sampling + void initialize(G1RemSet* remset, uint num_workers); + + void print_on(outputStream* out); + + double rs_thread_vtime(uint thread) const; + + double sampling_thread_vtime() const { + return _sampling_thread_vtime; + } + + size_t num_concurrent_refined_cards() const { + return _num_refined_cards; + } + + size_t num_processed_buf_mutator() const { + return _num_processed_buf_mutator; + } + + size_t num_processed_buf_rs_threads() const { + return _num_processed_buf_rs_threads; + } + + size_t num_processed_buf_total() const { + return num_processed_buf_mutator() + num_processed_buf_rs_threads(); + } + + size_t num_coarsenings() const { + return _num_coarsenings; + } +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -329,7 +329,11 @@ \ develop(bool, G1EvacuationFailureALotDuringMixedGC, true, \ "Force use of evacuation failure handling during mixed " \ - "evacuation pauses") + "evacuation pauses") \ + \ + diagnostic(bool, G1VerifyRSetsDuringFullGC, false, \ + "If true, perform verification of each heap region's " \ + "remembered set when verifying the heap during a full GC.") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -139,7 +139,7 @@ _n_failures++; } - if (!_g1h->full_collection()) { + if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) { HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); HeapRegion* to = _g1h->heap_region_containing(obj); if (from != NULL && to != NULL && diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -116,7 +116,7 @@ // The alignment used for eden and survivors within the young gen // and for boundary between young gen and old gen. - size_t intra_heap_alignment() const { return 64 * K; } + size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; } size_t capacity() const; size_t used() const; diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -59,13 +59,25 @@ #include // All sizes are in HeapWords. -const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words +const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; const size_t ParallelCompactData::RegionSizeBytes = RegionSize << LogHeapWordSize; const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; -const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; +const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; + +const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words +const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; +const size_t ParallelCompactData::BlockSizeBytes = + BlockSize << LogHeapWordSize; +const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1; +const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1; +const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask; + +const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize; +const size_t ParallelCompactData::Log2BlocksPerRegion = + Log2RegionSize - Log2BlockSize; const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_shift = 27; @@ -359,6 +371,10 @@ _reserved_byte_size = 0; _region_data = 0; _region_count = 0; + + _block_vspace = 0; + _block_data = 0; + _block_count = 0; } bool ParallelCompactData::initialize(MemRegion covered_region) @@ -372,8 +388,7 @@ assert((region_size & RegionSizeOffsetMask) == 0, "region size not a multiple of RegionSize"); - bool result = initialize_region_data(region_size); - + bool result = initialize_region_data(region_size) && initialize_block_data(); return result; } @@ -418,17 +433,36 @@ return false; } +bool ParallelCompactData::initialize_block_data() +{ + assert(_region_count != 0, "region data must be initialized first"); + const size_t count = _region_count << Log2BlocksPerRegion; + _block_vspace = create_vspace(count, sizeof(BlockData)); + if (_block_vspace != 0) { + _block_data = (BlockData*)_block_vspace->reserved_low_addr(); + _block_count = count; + return true; + } + return false; +} + void ParallelCompactData::clear() { memset(_region_data, 0, _region_vspace->committed_size()); + memset(_block_data, 0, _block_vspace->committed_size()); } void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { assert(beg_region <= _region_count, "beg_region out of range"); assert(end_region <= _region_count, "end_region out of range"); + assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize"); const size_t region_cnt = end_region - beg_region; memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); + + const size_t beg_block = beg_region * BlocksPerRegion; + const size_t block_cnt = region_cnt * BlocksPerRegion; + memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); } HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const @@ -707,49 +741,48 @@ HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); - assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); -#ifdef ASSERT - if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { - gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); - } -#endif - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); + assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap"); + assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked"); // Region covering the object. - size_t region_index = addr_to_region_idx(addr); - const RegionData* const region_ptr = region(region_index); - HeapWord* const region_addr = region_align_down(addr); - - assert(addr < region_addr + RegionSize, "Region does not cover object"); - assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); - + RegionData* const region_ptr = addr_to_region_ptr(addr); HeapWord* result = region_ptr->destination(); - // If all the data in the region is live, then the new location of the object - // can be calculated from the destination of the region plus the offset of the - // object in the region. + // If the entire Region is live, the new location is region->destination + the + // offset of the object within in the Region. + + // Run some performance tests to determine if this special case pays off. It + // is worth it for pointers into the dense prefix. If the optimization to + // avoid pointer updates in regions that only point to the dense prefix is + // ever implemented, this should be revisited. if (region_ptr->data_size() == RegionSize) { - result += pointer_delta(addr, region_addr); - DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) + result += region_offset(addr); return result; } - // The new location of the object is - // region destination + - // size of the partial object extending onto the region + - // sizes of the live objects in the Region that are to the left of addr - const size_t partial_obj_size = region_ptr->partial_obj_size(); - HeapWord* const search_start = region_addr + partial_obj_size; + // Otherwise, the new location is region->destination + block offset + the + // number of live words in the Block that are (a) to the left of addr and (b) + // due to objects that start in the Block. + + // Fill in the block table if necessary. This is unsynchronized, so multiple + // threads may fill the block table for a region (harmless, since it is + // idempotent). + if (!region_ptr->blocks_filled()) { + PSParallelCompact::fill_blocks(addr_to_region_idx(addr)); + region_ptr->set_blocks_filled(); + } + + HeapWord* const search_start = block_align_down(addr); + const size_t block_offset = addr_to_block_ptr(addr)->offset(); const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); - size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); - - result += partial_obj_size + live_to_left; - DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) + const size_t live = bitmap->live_words_in_range(search_start, oop(addr)); + result += block_offset + live; + DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result)); return result; } -#ifdef ASSERT +#ifdef ASSERT void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) { const size_t* const beg = (const size_t*)vspace->committed_low_addr(); @@ -762,16 +795,10 @@ void ParallelCompactData::verify_clear() { verify_clear(_region_vspace); + verify_clear(_block_vspace); } #endif // #ifdef ASSERT -#ifdef NOT_PRODUCT -ParallelCompactData::RegionData* debug_region(size_t region_index) { - ParallelCompactData& sd = PSParallelCompact::summary_data(); - return sd.region(region_index); -} -#endif - elapsedTimer PSParallelCompact::_accumulated_time; unsigned int PSParallelCompact::_total_invocations = 0; unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0; @@ -1961,11 +1988,6 @@ maximum_heap_compaction); } -bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { - size_t addr_region_index = addr_to_region_idx(addr); - return region_index == addr_region_index; -} - // This method contains no policy. You should probably // be calling invoke() instead. bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { @@ -2627,6 +2649,41 @@ } } +#ifdef ASSERT +// Write a histogram of the number of times the block table was filled for a +// region. +void PSParallelCompact::write_block_fill_histogram(outputStream* const out) +{ + if (!TraceParallelOldGCCompactionPhase) return; + + typedef ParallelCompactData::RegionData rd_t; + ParallelCompactData& sd = summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + MutableSpace* const spc = _space_info[id].space(); + if (spc->bottom() != spc->top()) { + const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom()); + HeapWord* const top_aligned_up = sd.region_align_up(spc->top()); + const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up); + + size_t histo[5] = { 0, 0, 0, 0, 0 }; + const size_t histo_len = sizeof(histo) / sizeof(size_t); + const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t)); + + for (const rd_t* cur = beg; cur < end; ++cur) { + ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)]; + } + out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt); + for (size_t i = 0; i < histo_len; ++i) { + out->print(" " SIZE_FORMAT_W(5) " %5.1f%%", + histo[i], 100.0 * histo[i] / region_cnt); + } + out->cr(); + } + } +} +#endif // #ifdef ASSERT + void PSParallelCompact::compact() { // trace("5"); TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty); @@ -2666,6 +2723,8 @@ update_deferred_objects(cm, SpaceId(id)); } } + + DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty)); } #ifdef ASSERT @@ -3130,6 +3189,57 @@ } while (true); } +void PSParallelCompact::fill_blocks(size_t region_idx) +{ + // Fill in the block table elements for the specified region. Each block + // table element holds the number of live words in the region that are to the + // left of the first object that starts in the block. Thus only blocks in + // which an object starts need to be filled. + // + // The algorithm scans the section of the bitmap that corresponds to the + // region, keeping a running total of the live words. When an object start is + // found, if it's the first to start in the block that contains it, the + // current total is written to the block table element. + const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize; + const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize; + const size_t RegionSize = ParallelCompactData::RegionSize; + + ParallelCompactData& sd = summary_data(); + const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size(); + if (partial_obj_size >= RegionSize) { + return; // No objects start in this region. + } + + // Ensure the first loop iteration decides that the block has changed. + size_t cur_block = sd.block_count(); + + const ParMarkBitMap* const bitmap = mark_bitmap(); + + const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment; + assert((size_t)1 << Log2BitsPerBlock == + bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity"); + + size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize); + const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize); + size_t live_bits = bitmap->words_to_bits(partial_obj_size); + beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end); + while (beg_bit < range_end) { + const size_t new_block = beg_bit >> Log2BitsPerBlock; + if (new_block != cur_block) { + cur_block = new_block; + sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); + } + + const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); + if (end_bit < range_end - 1) { + live_bits += end_bit - beg_bit + 1; + beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end); + } else { + return; + } + } +} + void PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { const MutableSpace* sp = space(space_id); diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -220,6 +220,17 @@ // Mask for the bits in a pointer to get the address of the start of a region. static const size_t RegionAddrMask; + static const size_t Log2BlockSize; + static const size_t BlockSize; + static const size_t BlockSizeBytes; + + static const size_t BlockSizeOffsetMask; + static const size_t BlockAddrOffsetMask; + static const size_t BlockAddrMask; + + static const size_t BlocksPerRegion; + static const size_t Log2BlocksPerRegion; + class RegionData { public: @@ -272,6 +283,12 @@ inline uint destination_count() const; inline uint destination_count_raw() const; + // Whether the block table for this region has been filled. + inline bool blocks_filled() const; + + // Number of times the block table was filled. + DEBUG_ONLY(inline size_t blocks_filled_count() const;) + // The location of the java heap data that corresponds to this region. inline HeapWord* data_location() const; @@ -296,6 +313,7 @@ void set_partial_obj_size(size_t words) { _partial_obj_size = (region_sz_t) words; } + inline void set_blocks_filled(); inline void set_destination_count(uint count); inline void set_live_obj_size(size_t words); @@ -328,7 +346,11 @@ HeapWord* _partial_obj_addr; region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; + bool _blocks_filled; + #ifdef ASSERT + size_t _blocks_filled_count; // Number of block table fills. + // These enable optimizations that are only partially implemented. Use // debug builds to prevent the code fragments from breaking. HeapWord* _data_location; @@ -337,11 +359,26 @@ #ifdef ASSERT public: - uint _pushed; // 0 until region is pushed onto a worker's stack + uint _pushed; // 0 until region is pushed onto a stack private: #endif }; + // "Blocks" allow shorter sections of the bitmap to be searched. Each Block + // holds an offset, which is the amount of live data in the Region to the left + // of the first live object that starts in the Block. + class BlockData + { + public: + typedef unsigned short int blk_ofs_t; + + blk_ofs_t offset() const { return _offset; } + void set_offset(size_t val) { _offset = (blk_ofs_t)val; } + + private: + blk_ofs_t _offset; + }; + public: ParallelCompactData(); bool initialize(MemRegion covered_region); @@ -353,8 +390,9 @@ inline RegionData* region(size_t region_idx) const; inline size_t region(const RegionData* const region_ptr) const; - // Returns true if the given address is contained within the region - bool region_contains(size_t region_index, HeapWord* addr); + size_t block_count() const { return _block_count; } + inline BlockData* block(size_t block_idx) const; + inline size_t block(const BlockData* block_ptr) const; void add_obj(HeapWord* addr, size_t len); void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); } @@ -394,11 +432,24 @@ inline HeapWord* region_align_up(HeapWord* addr) const; inline bool is_region_aligned(HeapWord* addr) const; + // Analogous to region_offset() for blocks. + size_t block_offset(const HeapWord* addr) const; + size_t addr_to_block_idx(const HeapWord* addr) const; + size_t addr_to_block_idx(const oop obj) const { + return addr_to_block_idx((HeapWord*) obj); + } + inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; + inline HeapWord* block_to_addr(size_t block) const; + inline size_t region_to_block_idx(size_t region) const; + + inline HeapWord* block_align_down(HeapWord* addr) const; + inline HeapWord* block_align_up(HeapWord* addr) const; + inline bool is_block_aligned(HeapWord* addr) const; + // Return the address one past the end of the partial object. HeapWord* partial_obj_end(size_t region_idx) const; - // Return the new location of the object p after the - // the compaction. + // Return the location of the object after compaction. HeapWord* calc_new_pointer(HeapWord* addr); HeapWord* calc_new_pointer(oop p) { @@ -411,6 +462,7 @@ #endif // #ifdef ASSERT private: + bool initialize_block_data(); bool initialize_region_data(size_t region_size); PSVirtualSpace* create_vspace(size_t count, size_t element_size); @@ -424,6 +476,10 @@ size_t _reserved_byte_size; RegionData* _region_data; size_t _region_count; + + PSVirtualSpace* _block_vspace; + BlockData* _block_data; + size_t _block_count; }; inline uint @@ -438,6 +494,28 @@ return destination_count_raw() >> dc_shift; } +inline bool +ParallelCompactData::RegionData::blocks_filled() const +{ + return _blocks_filled; +} + +#ifdef ASSERT +inline size_t +ParallelCompactData::RegionData::blocks_filled_count() const +{ + return _blocks_filled_count; +} +#endif // #ifdef ASSERT + +inline void +ParallelCompactData::RegionData::set_blocks_filled() +{ + _blocks_filled = true; + // Debug builds count the number of times the table was filled. + DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count)); +} + inline void ParallelCompactData::RegionData::set_destination_count(uint count) { @@ -532,6 +610,12 @@ return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); } +inline ParallelCompactData::BlockData* +ParallelCompactData::block(size_t n) const { + assert(n < block_count(), "bad arg"); + return _block_data + n; +} + inline size_t ParallelCompactData::region_offset(const HeapWord* addr) const { @@ -598,6 +682,63 @@ return region_offset(addr) == 0; } +inline size_t +ParallelCompactData::block_offset(const HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize; +} + +inline size_t +ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return pointer_delta(addr, _region_start) >> Log2BlockSize; +} + +inline ParallelCompactData::BlockData* +ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const +{ + return block(addr_to_block_idx(addr)); +} + +inline HeapWord* +ParallelCompactData::block_to_addr(size_t block) const +{ + assert(block < _block_count, "block out of range"); + return _region_start + (block << Log2BlockSize); +} + +inline size_t +ParallelCompactData::region_to_block_idx(size_t region) const +{ + return region << Log2BlocksPerRegion; +} + +inline HeapWord* +ParallelCompactData::block_align_down(HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr < _region_end + RegionSize, "bad addr"); + return (HeapWord*)(size_t(addr) & BlockAddrMask); +} + +inline HeapWord* +ParallelCompactData::block_align_up(HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return block_align_down(addr + BlockSizeOffsetMask); +} + +inline bool +ParallelCompactData::is_block_aligned(HeapWord* addr) const +{ + return block_offset(addr) == 0; +} + // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the // do_addr() method. // @@ -775,6 +916,7 @@ // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; typedef ParallelCompactData::RegionData RegionData; + typedef ParallelCompactData::BlockData BlockData; typedef enum { old_space_id, eden_space_id, @@ -962,6 +1104,8 @@ // Adjust addresses in roots. Does not adjust addresses in heap. static void adjust_roots(); + DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);) + // Move objects to new locations. static void compact_perm(ParCompactionManager* cm); static void compact(); @@ -1128,6 +1272,9 @@ fill_region(cm, region); } + // Fill in the block table for the specified region. + static void fill_blocks(size_t region_idx); + // Update the deferred objects in the space. static void update_deferred_objects(ParCompactionManager* cm, SpaceId id); diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -42,7 +42,7 @@ if (o->is_forwarded()) { o = o->forwardee(); // Card mark - if (PSScavenge::is_obj_in_young((HeapWord*) o)) { + if (PSScavenge::is_obj_in_young(o)) { PSScavenge::card_table()->inline_write_ref_field_gc(p, o); } oopDesc::encode_store_heap_oop_not_null(p, o); diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Thu Jun 06 05:56:33 2013 -0700 @@ -61,6 +61,7 @@ bool PSScavenge::_survivor_overflow = false; uint PSScavenge::_tenuring_threshold = 0; HeapWord* PSScavenge::_young_generation_boundary = NULL; +uintptr_t PSScavenge::_young_generation_boundary_compressed = 0; elapsedTimer PSScavenge::_accumulated_time; Stack PSScavenge::_preserved_mark_stack; Stack PSScavenge::_preserved_oop_stack; @@ -71,7 +72,7 @@ class PSIsAliveClosure: public BoolObjectClosure { public: bool do_object_b(oop p) { - return (!PSScavenge::is_obj_in_young((HeapWord*) p)) || p->is_forwarded(); + return (!PSScavenge::is_obj_in_young(p)) || p->is_forwarded(); } }; @@ -815,7 +816,7 @@ // Set boundary between young_gen and old_gen assert(old_gen->reserved().end() <= young_gen->eden_space()->bottom(), "old above young"); - _young_generation_boundary = young_gen->eden_space()->bottom(); + set_young_generation_boundary(young_gen->eden_space()->bottom()); // Initialize ref handling object for scavenging. MemRegion mr = young_gen->reserved(); diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -62,19 +62,22 @@ protected: // Flags/counters - static ReferenceProcessor* _ref_processor; // Reference processor for scavenging. - static PSIsAliveClosure _is_alive_closure; // Closure used for reference processing - static CardTableExtension* _card_table; // We cache the card table for fast access. - static bool _survivor_overflow; // Overflow this collection - static uint _tenuring_threshold; // tenuring threshold for next scavenge - static elapsedTimer _accumulated_time; // total time spent on scavenge - static HeapWord* _young_generation_boundary; // The lowest address possible for the young_gen. - // This is used to decide if an oop should be scavenged, - // cards should be marked, etc. + static ReferenceProcessor* _ref_processor; // Reference processor for scavenging. + static PSIsAliveClosure _is_alive_closure; // Closure used for reference processing + static CardTableExtension* _card_table; // We cache the card table for fast access. + static bool _survivor_overflow; // Overflow this collection + static uint _tenuring_threshold; // tenuring threshold for next scavenge + static elapsedTimer _accumulated_time; // total time spent on scavenge + // The lowest address possible for the young_gen. + // This is used to decide if an oop should be scavenged, + // cards should be marked, etc. + static HeapWord* _young_generation_boundary; + // Used to optimize compressed oops young gen boundary checking. + static uintptr_t _young_generation_boundary_compressed; static Stack _preserved_mark_stack; // List of marks to be restored after failed promotion static Stack _preserved_oop_stack; // List of oops that need their mark restored. - static CollectorCounters* _counters; // collector performance counters - static bool _promotion_failed; + static CollectorCounters* _counters; // collector performance counters + static bool _promotion_failed; static void clean_up_failed_promotion(); @@ -112,6 +115,9 @@ // boundary moves, _young_generation_boundary must be reset static void set_young_generation_boundary(HeapWord* v) { _young_generation_boundary = v; + if (UseCompressedOops) { + _young_generation_boundary_compressed = (uintptr_t)oopDesc::encode_heap_oop((oop)v); + } } // Called by parallelScavengeHeap to init the tenuring threshold @@ -140,11 +146,19 @@ static void copy_and_push_safe_barrier_from_klass(PSPromotionManager* pm, oop* p); // Is an object in the young generation - // This assumes that the HeapWord argument is in the heap, + // This assumes that the 'o' is in the heap, // so it only checks one side of the complete predicate. + + inline static bool is_obj_in_young(oop o) { + return (HeapWord*)o >= _young_generation_boundary; + } + + inline static bool is_obj_in_young(narrowOop o) { + return (uintptr_t)o >= _young_generation_boundary_compressed; + } + inline static bool is_obj_in_young(HeapWord* o) { - const bool result = (o >= _young_generation_boundary); - return result; + return o >= _young_generation_boundary; } }; diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -39,9 +39,7 @@ template inline bool PSScavenge::should_scavenge(T* p) { T heap_oop = oopDesc::load_heap_oop(p); - if (oopDesc::is_null(heap_oop)) return false; - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - return PSScavenge::is_obj_in_young((HeapWord*)obj); + return PSScavenge::is_obj_in_young(heap_oop); } template @@ -94,7 +92,7 @@ // or from metadata. if ((!PSScavenge::is_obj_in_young((HeapWord*)p)) && Universe::heap()->is_in_reserved(p)) { - if (PSScavenge::is_obj_in_young((HeapWord*)new_obj)) { + if (PSScavenge::is_obj_in_young(new_obj)) { card_table()->inline_write_ref_field_gc(p, new_obj); } } @@ -147,7 +145,7 @@ } oopDesc::encode_store_heap_oop_not_null(p, new_obj); - if (PSScavenge::is_obj_in_young((HeapWord*)new_obj)) { + if (PSScavenge::is_obj_in_young(new_obj)) { do_klass_barrier(); } } diff -r 6bf8b8bb7c19 -r f8c8cace25ad src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp --- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp Wed Jun 05 14:12:49 2013 -0400 +++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp Thu Jun 06 05:56:33 2013 -0700 @@ -158,7 +158,7 @@ // Fills in the unallocated portion of the buffer with a garbage object. // If "end_of_gc" is TRUE, is after the last use in the GC. IF "retain" // is true, attempt to re-use the unused portion in the next GC. - void retire(bool end_of_gc, bool retain); + virtual void retire(bool end_of_gc, bool retain); void print() PRODUCT_RETURN; }; diff -r 6bf8b8bb7c19 -r f8c8cace25ad test/gc/g1/TestSummarizeRSetStats.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestSummarizeRSetStats.java Thu Jun 06 05:56:33 2013 -0700 @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestSummarizeRSetStats.java + * @bug 8013895 + * @library /testlibrary + * @build TestSummarizeRSetStats + * @summary Verify output of -XX:+G1SummarizeRSetStats + * @run main TestSummarizeRSetStats + * + * Test the output of G1SummarizeRSetStats in conjunction with G1SummarizeRSetStatsPeriod. + */ + +import com.oracle.java.testlibrary.*; +import java.lang.Thread; +import java.util.ArrayList; +import java.util.Arrays; + +class RunSystemGCs { + // 4M size, both are directly allocated into the old gen + static Object[] largeObject1 = new Object[1024 * 1024]; + static Object[] largeObject2 = new Object[1024 * 1024]; + + static int[] temp; + + public static void main(String[] args) { + // create some cross-references between these objects + for (int i = 0; i < largeObject1.length; i++) { + largeObject1[i] = largeObject2; + } + + for (int i = 0; i < largeObject2.length; i++) { + largeObject2[i] = largeObject1; + } + + int numGCs = Integer.parseInt(args[0]); + + if (numGCs > 0) { + // try to force a minor collection: the young gen is 4M, the + // amount of data allocated below is roughly that (4*1024*1024 + + // some header data) + for (int i = 0; i < 1024 ; i++) { + temp = new int[1024]; + } + } + + for (int i = 0; i < numGCs - 1; i++) { + System.gc(); + } + } +} + +public class TestSummarizeRSetStats { + + public static String runTest(String[] additionalArgs, int numGCs) throws Exception { + ArrayList finalargs = new ArrayList(); + String[] defaultArgs = new String[] { + "-XX:+UseG1GC", + "-Xmn4m", + "-Xmx20m", + "-XX:InitiatingHeapOccupancyPercent=100", // we don't want the additional GCs due to initial marking + "-XX:+PrintGC", + "-XX:+UnlockDiagnosticVMOptions", + "-XX:G1HeapRegionSize=1M", + }; + + finalargs.addAll(Arrays.asList(defaultArgs)); + + if (additionalArgs != null) { + finalargs.addAll(Arrays.asList(additionalArgs)); + } + + finalargs.add(RunSystemGCs.class.getName()); + finalargs.add(String.valueOf(numGCs)); + + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( + finalargs.toArray(new String[0])); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + + output.shouldHaveExitValue(0); + + String result = output.getStdout(); + return result; + } + + private static void expectStatistics(String result, int expectedCumulative, int expectedPeriodic) throws Exception { + int actualTotal = result.split("Concurrent RS processed").length - 1; + int actualCumulative = result.split("Cumulative RS summary").length - 1; + + if (expectedCumulative != actualCumulative) { + throw new Exception("Incorrect amount of RSet summaries at the end. Expected " + expectedCumulative + ", got " + actualCumulative); + } + + if (expectedPeriodic != (actualTotal - actualCumulative)) { + throw new Exception("Incorrect amount of per-period RSet summaries at the end. Expected " + expectedPeriodic + ", got " + (actualTotal - actualCumulative)); + } + } + + public static void main(String[] args) throws Exception { + String result; + + // no RSet statistics output + result = runTest(null, 0); + expectStatistics(result, 0, 0); + + // no RSet statistics output + result = runTest(null, 2); + expectStatistics(result, 0, 0); + + // no RSet statistics output + result = runTest(new String[] { "-XX:G1SummarizeRSetStatsPeriod=1" }, 3); + expectStatistics(result, 0, 0); + + // single RSet statistics output at the end + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats" }, 0); + expectStatistics(result, 1, 0); + + // single RSet statistics output at the end + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats" }, 2); + expectStatistics(result, 1, 0); + + // single RSet statistics output + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 0); + expectStatistics(result, 1, 0); + + // two times RSet statistics output + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 1); + expectStatistics(result, 1, 1); + + // four times RSet statistics output + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 3); + expectStatistics(result, 1, 3); + + // three times RSet statistics output + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=2" }, 3); + expectStatistics(result, 1, 2); + + // single RSet statistics output + result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=100" }, 3); + expectStatistics(result, 1, 1); + } +} +