Mercurial > hg > truffle
diff src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @ 4095:bca17e38de00
6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads
Summary: Select number of GC threads dynamically based on heap usage and number of Java threads
Reviewed-by: johnc, ysr, jcoomes
author | jmasa |
---|---|
date | Tue, 09 Aug 2011 10:16:01 -0700 |
parents | a88de71c4e3a |
children | dc467e8b2c5e |
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Nov 22 04:47:10 2011 -0500 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Aug 09 10:16:01 2011 -0700 @@ -66,6 +66,18 @@ // apply to TLAB allocation, which is not part of this interface: it // is done by clients of this interface.) +// Notes on implementation of parallelism in different tasks. +// +// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism. +// The number of GC workers is passed to heap_region_par_iterate_chunked(). +// It does use run_task() which sets _n_workers in the task. +// G1ParTask executes g1_process_strong_roots() -> +// SharedHeap::process_strong_roots() which calls eventuall to +// CardTableModRefBS::par_non_clean_card_iterate_work() which uses +// SequentialSubTasksDone. SharedHeap::process_strong_roots() also +// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap). +// + // Local to this file. class RefineCardTableEntryClosure: public CardTableEntryClosure { @@ -1156,6 +1168,7 @@ void work(int i) { RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, + _g1->workers()->active_workers(), HeapRegion::RebuildRSClaimValue); } }; @@ -1360,12 +1373,32 @@ } // Rebuild remembered sets of all regions. - if (G1CollectedHeap::use_parallel_gc_threads()) { + int n_workers = + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), + workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "If not dynamic should be using all the workers"); + workers()->set_active_workers(n_workers); + // Set parallel threads in the heap (_n_par_threads) only + // before a parallel phase and always reset it to 0 after + // the phase so that the number of parallel threads does + // no get carried forward to a serial phase where there + // may be code that is "possibly_parallel". + set_par_threads(n_workers); + ParRebuildRSTask rebuild_rs_task(this); assert(check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); - set_par_threads(workers()->total_workers()); + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "Unless dynamic should use total workers"); + // Use the most recent number of active workers + assert(workers()->active_workers() > 0, + "Active workers not properly set"); + set_par_threads(workers()->active_workers()); workers()->run_task(&rebuild_rs_task); set_par_threads(0); assert(check_heap_region_claim_values( @@ -2477,11 +2510,17 @@ void G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, int worker, + int no_of_par_workers, jint claim_value) { const size_t regions = n_regions(); - const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); + const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + no_of_par_workers : + 1); + assert(UseDynamicNumberOfGCThreads || + no_of_par_workers == workers()->total_workers(), + "Non dynamic should use fixed number of workers"); // try to spread out the starting points of the workers - const size_t start_index = regions / worker_num * (size_t) worker; + const size_t start_index = regions / max_workers * (size_t) worker; // each worker will actually look at all regions for (size_t count = 0; count < regions; ++count) { @@ -2920,6 +2959,7 @@ HandleMark hm; VerifyRegionClosure blk(_allow_dirty, true, _vo); _g1h->heap_region_par_iterate_chunked(&blk, worker_i, + _g1h->workers()->active_workers(), HeapRegion::ParVerifyClaimValue); if (blk.failures()) { _failures = true; @@ -2937,6 +2977,10 @@ if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); } VerifyRootsClosure rootsCl(vo); + + assert(Thread::current()->is_VM_thread(), + "Expected to be executed serially by the VM thread at this point"); + CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); // We apply the relevant closures to all the oops in the @@ -2981,7 +3025,10 @@ "sanity check"); G1ParVerifyTask task(this, allow_dirty, vo); - int n_workers = workers()->total_workers(); + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "If not dynamic should be using all the workers"); + int n_workers = workers()->active_workers(); set_par_threads(n_workers); workers()->run_task(&task); set_par_threads(0); @@ -2989,6 +3036,8 @@ failures = true; } + // Checks that the expected amount of parallel work was done. + // The implication is that n_workers is > 0. assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue), "sanity check"); @@ -3402,6 +3451,10 @@ assert(check_young_list_well_formed(), "young list should be well formed"); + // Don't dynamically change the number of GC threads this early. A value of + // 0 is used to indicate serial work. When parallel work is done, + // it will be set. + { // Call to jvmpi::post_class_unload_events must occur outside of active GC IsGCActiveMark x; @@ -3615,7 +3668,8 @@ double end_time_sec = os::elapsedTime(); double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; g1_policy()->record_pause_time_ms(pause_time_ms); - g1_policy()->record_collection_pause_end(); + int active_gc_threads = workers()->active_workers(); + g1_policy()->record_collection_pause_end(active_gc_threads); MemoryService::track_memory_usage(); @@ -4562,13 +4616,13 @@ } public: - G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) + G1ParTask(G1CollectedHeap* g1h, + RefToScanQueueSet *task_queues) : AbstractGangTask("G1 collection"), _g1h(g1h), _queues(task_queues), - _terminator(workers, _queues), - _stats_lock(Mutex::leaf, "parallel G1 stats lock", true), - _n_workers(workers) + _terminator(0, _queues), + _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) {} RefToScanQueueSet* queues() { return _queues; } @@ -4577,6 +4631,20 @@ return queues()->queue(i); } + ParallelTaskTerminator* terminator() { return &_terminator; } + + virtual void set_for_termination(int active_workers) { + // This task calls set_n_termination() in par_non_clean_card_iterate_work() + // in the young space (_par_seq_tasks) in the G1 heap + // for SequentialSubTasksDone. + // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap + // both of which need setting by set_n_termination(). + _g1h->SharedHeap::set_n_termination(active_workers); + _g1h->set_n_termination(active_workers); + terminator()->reset_for_reuse(active_workers); + _n_workers = active_workers; + } + void work(int i) { if (i >= _n_workers) return; // no work needed this round @@ -4861,12 +4929,12 @@ private: G1CollectedHeap* _g1h; RefToScanQueueSet* _queues; - WorkGang* _workers; + FlexibleWorkGang* _workers; int _active_workers; public: G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, - WorkGang* workers, + FlexibleWorkGang* workers, RefToScanQueueSet *task_queues, int n_workers) : _g1h(g1h), @@ -5122,11 +5190,13 @@ // referents points to another object which is also referenced by an // object discovered by the STW ref processor. - int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? - workers()->total_workers() : 1); - - set_par_threads(n_workers); - G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues); + int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + workers()->active_workers() : 1); + + assert(active_workers == workers()->active_workers(), + "Need to reset active_workers"); + set_par_threads(active_workers); + G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues); if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->run_task(&keep_cm_referents); @@ -5192,7 +5262,6 @@ NULL); } else { // Parallel reference processing - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); assert(rp->num_q() == active_workers, "sanity"); assert(active_workers <= rp->max_num_q(), "sanity"); @@ -5225,7 +5294,9 @@ } else { // Parallel reference enqueuing - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1); + assert(active_workers == workers()->active_workers(), + "Need to reset active_workers"); assert(rp->num_q() == active_workers, "sanity"); assert(active_workers <= rp->max_num_q(), "sanity"); @@ -5252,9 +5323,24 @@ concurrent_g1_refine()->set_use_cache(false); concurrent_g1_refine()->clear_hot_cache_claimed_index(); - int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); - G1ParTask g1_par_task(this, n_workers, _task_queues); + int n_workers; + if (G1CollectedHeap::use_parallel_gc_threads()) { + n_workers = + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), + workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "If not dynamic should be using all the workers"); + set_par_threads(n_workers); + } else { + assert(n_par_threads() == 0, + "Should be the original non-parallel value"); + n_workers = 1; + } + workers()->set_active_workers(n_workers); + + G1ParTask g1_par_task(this, _task_queues); init_for_evac_failure(NULL); @@ -5267,6 +5353,10 @@ // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); + // These tasks use ShareHeap::_process_strong_tasks + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "If not dynamic should be using all the workers"); workers()->run_task(&g1_par_task); } else { StrongRootsScope srs(this); @@ -5275,6 +5365,7 @@ double par_time = (os::elapsedTime() - start_par) * 1000.0; g1_policy()->record_par_time(par_time); + set_par_threads(0); // Process any discovered reference objects - we have @@ -5905,6 +5996,21 @@ return _g1h->new_mutator_alloc_region(word_size, force); } +void G1CollectedHeap::set_par_threads() { + // Don't change the number of workers. Use the value previously set + // in the workgroup. + int n_workers = workers()->active_workers(); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "Otherwise should be using the total number of workers"); + if (n_workers == 0) { + assert(false, "Should have been set in prior evacuation pause."); + n_workers = ParallelGCThreads; + workers()->set_active_workers(n_workers); + } + set_par_threads(n_workers); +} + void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, size_t allocated_bytes) { _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);