graal-compiler: src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp comparison

comparison src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @ 4095:bca17e38de00

6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads Summary: Select number of GC threads dynamically based on heap usage and number of Java threads Reviewed-by: johnc, ysr, jcoomes

author	jmasa
date	Tue, 09 Aug 2011 10:16:01 -0700
parents	a88de71c4e3a
children	dc467e8b2c5e

comparison

equal deleted inserted replaced

-:3a298e04d914
+:bca17e38de00
 // serialized by acquiring the HeapLock.  This happens in mem_allocate
 // and allocate_new_tlab, which are the "entry" points to the
 // allocation code from the rest of the JVM.  (Note that this does not
 // apply to TLAB allocation, which is not part of this interface: it
 // is done by clients of this interface.)
+// Notes on implementation of parallelism in different tasks.
+//
+// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
+// The number of GC workers is passed to heap_region_par_iterate_chunked().
+// It does use run_task() which sets _n_workers in the task.
+// G1ParTask executes g1_process_strong_roots() ->
+// SharedHeap::process_strong_roots() which calls eventuall to
+// CardTableModRefBS::par_non_clean_card_iterate_work() which uses
+// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
+// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
+//
 // Local to this file.
 class RefineCardTableEntryClosure: public CardTableEntryClosure {
 SuspendibleThreadSet* _sts;
 { }
 void work(int i) {
 RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
 _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+_g1->workers()->active_workers(),
 HeapRegion::RebuildRSClaimValue);
 }
 };
 class PostCompactionPrinterClosure: public HeapRegionClosure {
 _cg1r->clear_and_record_card_counts();
 _cg1r->clear_hot_cache();
 }
 // Rebuild remembered sets of all regions.
 if (G1CollectedHeap::use_parallel_gc_threads()) {
+int n_workers =
+AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+workers()->active_workers(),
+Threads::number_of_non_daemon_threads());
+assert(UseDynamicNumberOfGCThreads ||
+n_workers == workers()->total_workers(),
+"If not dynamic should be using all the  workers");
+workers()->set_active_workers(n_workers);
+// Set parallel threads in the heap (_n_par_threads) only
+// before a parallel phase and always reset it to 0 after
+// the phase so that the number of parallel threads does
+// no get carried forward to a serial phase where there
+// may be code that is "possibly_parallel".
+set_par_threads(n_workers);
 ParRebuildRSTask rebuild_rs_task(this);
 assert(check_heap_region_claim_values(
 HeapRegion::InitialClaimValue), "sanity check");
-set_par_threads(workers()->total_workers());
+assert(UseDynamicNumberOfGCThreads ||
+workers()->active_workers() == workers()->total_workers(),
+"Unless dynamic should use total workers");
+// Use the most recent number of  active workers
+assert(workers()->active_workers() > 0,
+"Active workers not properly set");
+set_par_threads(workers()->active_workers());
 workers()->run_task(&rebuild_rs_task);
 set_par_threads(0);
 assert(check_heap_region_claim_values(
 HeapRegion::RebuildRSClaimValue), "sanity check");
 reset_heap_region_claim_values();
 }
 void
 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
 int worker,
+int no_of_par_workers,
 jint claim_value) {
 const size_t regions = n_regions();
-const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1);
+const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+no_of_par_workers :
+1);
+assert(UseDynamicNumberOfGCThreads ||
+no_of_par_workers == workers()->total_workers(),
+"Non dynamic should use fixed number of workers");
 // try to spread out the starting points of the workers
-const size_t start_index = regions / worker_num * (size_t) worker;
+const size_t start_index = regions / max_workers * (size_t) worker;
 // each worker will actually look at all regions
 for (size_t count = 0; count < regions; ++count) {
 const size_t index = (start_index + count) % regions;
 assert(0 <= index && index < regions, "sanity");
 void work(int worker_i) {
 HandleMark hm;
 VerifyRegionClosure blk(_allow_dirty, true, _vo);
 _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+_g1h->workers()->active_workers(),
 HeapRegion::ParVerifyClaimValue);
 if (blk.failures()) {
 _failures = true;
 }
 }
 bool silent,
 VerifyOption vo) {
 if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
 if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
 VerifyRootsClosure rootsCl(vo);
+assert(Thread::current()->is_VM_thread(),
+"Expected to be executed serially by the VM thread at this point");
 CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
 // We apply the relevant closures to all the oops in the
 // system dictionary, the string table and the code cache.
 const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
 if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
 assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
 "sanity check");
 G1ParVerifyTask task(this, allow_dirty, vo);
-int n_workers = workers()->total_workers();
+assert(UseDynamicNumberOfGCThreads ||
+workers()->active_workers() == workers()->total_workers(),
+"If not dynamic should be using all the workers");
+int n_workers = workers()->active_workers();
 set_par_threads(n_workers);
 workers()->run_task(&task);
 set_par_threads(0);
 if (task.failures()) {
 failures = true;
 }
+// Checks that the expected amount of parallel work was done.
+// The implication is that n_workers is > 0.
 assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
 "sanity check");
 reset_heap_region_claim_values();
 }
 assert(check_young_list_well_formed(),
 "young list should be well formed");
+// Don't dynamically change the number of GC threads this early.  A value of
+// 0 is used to indicate serial work.  When parallel work is done,
+// it will be set.
 { // Call to jvmpi::post_class_unload_events must occur outside of active GC
 IsGCActiveMark x;
 gc_prologue(false);
 increment_total_collections(false /* full gc */);
 }
 double end_time_sec = os::elapsedTime();
 double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
 g1_policy()->record_pause_time_ms(pause_time_ms);
-g1_policy()->record_collection_pause_end();
+int active_gc_threads = workers()->active_workers();
+g1_policy()->record_collection_pause_end(active_gc_threads);
 MemoryService::track_memory_usage();
 // In prepare_for_verify() below we'll need to scan the deferred
 // update buffers to bring the RSets up-to-date if
 return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
 / G1BlockOffsetSharedArray::N_bytes;
 }
 public:
-G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
+G1ParTask(G1CollectedHeap* g1h,
+RefToScanQueueSet *task_queues)
 : AbstractGangTask("G1 collection"),
 _g1h(g1h),
 _queues(task_queues),
-_terminator(workers, _queues),
+_terminator(0, _queues),
-_stats_lock(Mutex::leaf, "parallel G1 stats lock", true),
+_stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
-_n_workers(workers)
 {}
 RefToScanQueueSet* queues() { return _queues; }
 RefToScanQueue *work_queue(int i) {
 return queues()->queue(i);
+}
+ParallelTaskTerminator* terminator() { return &_terminator; }
+virtual void set_for_termination(int active_workers) {
+// This task calls set_n_termination() in par_non_clean_card_iterate_work()
+// in the young space (_par_seq_tasks) in the G1 heap
+// for SequentialSubTasksDone.
+// This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
+// both of which need setting by set_n_termination().
+_g1h->SharedHeap::set_n_termination(active_workers);
+_g1h->set_n_termination(active_workers);
+terminator()->reset_for_reuse(active_workers);
+_n_workers = active_workers;
 }
 void work(int i) {
 if (i >= _n_workers) return;  // no work needed this round
 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
 private:
 G1CollectedHeap*   _g1h;
 RefToScanQueueSet* _queues;
-WorkGang*          _workers;
+FlexibleWorkGang*  _workers;
 int                _active_workers;
 public:
 G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
-WorkGang* workers,
+FlexibleWorkGang* workers,
 RefToScanQueueSet *task_queues,
 int n_workers) :
 _g1h(g1h),
 _queues(task_queues),
 _workers(workers),
 // We also need to do this copying before we process the reference
 // objects discovered by the STW ref processor in case one of these
 // referents points to another object which is also referenced by an
 // object discovered by the STW ref processor.
-int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-workers()->total_workers() : 1);
+workers()->active_workers() : 1);
-set_par_threads(n_workers);
+assert(active_workers == workers()->active_workers(),
-G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues);
+"Need to reset active_workers");
+set_par_threads(active_workers);
+G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues);
 if (G1CollectedHeap::use_parallel_gc_threads()) {
 workers()->run_task(&keep_cm_referents);
 } else {
 keep_cm_referents.work(0);
 &keep_alive,
 &drain_queue,
 NULL);
 } else {
 // Parallel reference processing
-int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
 assert(rp->num_q() == active_workers, "sanity");
 assert(active_workers <= rp->max_num_q(), "sanity");
 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers);
 rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, &par_task_executor);
 // Serial reference processing...
 rp->enqueue_discovered_references();
 } else {
 // Parallel reference enqueuing
-int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
+assert(active_workers == workers()->active_workers(),
+"Need to reset active_workers");
 assert(rp->num_q() == active_workers, "sanity");
 assert(active_workers <= rp->max_num_q(), "sanity");
 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers);
 rp->enqueue_discovered_references(&par_task_executor);
 g1_rem_set()->prepare_for_oops_into_collection_set_do();
 concurrent_g1_refine()->set_use_cache(false);
 concurrent_g1_refine()->clear_hot_cache_claimed_index();
-int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+int n_workers;
-set_par_threads(n_workers);
+if (G1CollectedHeap::use_parallel_gc_threads()) {
-G1ParTask g1_par_task(this, n_workers, _task_queues);
+n_workers =
+AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+workers()->active_workers(),
+Threads::number_of_non_daemon_threads());
+assert(UseDynamicNumberOfGCThreads ||
+n_workers == workers()->total_workers(),
+"If not dynamic should be using all the  workers");
+set_par_threads(n_workers);
+} else {
+assert(n_par_threads() == 0,
+"Should be the original non-parallel value");
+n_workers = 1;
+}
+workers()->set_active_workers(n_workers);
+G1ParTask g1_par_task(this, _task_queues);
 init_for_evac_failure(NULL);
 rem_set()->prepare_for_younger_refs_iterate(true);
 if (G1CollectedHeap::use_parallel_gc_threads()) {
 // The individual threads will set their evac-failure closures.
 StrongRootsScope srs(this);
 if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
+// These tasks use ShareHeap::_process_strong_tasks
+assert(UseDynamicNumberOfGCThreads ||
+workers()->active_workers() == workers()->total_workers(),
+"If not dynamic should be using all the  workers");
 workers()->run_task(&g1_par_task);
 } else {
 StrongRootsScope srs(this);
 g1_par_task.work(0);
 }
 double par_time = (os::elapsedTime() - start_par) * 1000.0;
 g1_policy()->record_par_time(par_time);
 set_par_threads(0);
 // Process any discovered reference objects - we have
 // to do this _before_ we retire the GC alloc regions
 // as we may have to copy some 'reachable' referent
 }
 HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size,
 bool force) {
 return _g1h->new_mutator_alloc_region(word_size, force);
+}
+void G1CollectedHeap::set_par_threads() {
+// Don't change the number of workers.  Use the value previously set
+// in the workgroup.
+int n_workers = workers()->active_workers();
+assert(UseDynamicNumberOfGCThreads ||
+n_workers == workers()->total_workers(),
+"Otherwise should be using the total number of workers");
+if (n_workers == 0) {
+assert(false, "Should have been set in prior evacuation pause.");
+n_workers = ParallelGCThreads;
+workers()->set_active_workers(n_workers);
+}
+set_par_threads(n_workers);
 }
 void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
 size_t allocated_bytes) {
 _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);

Mercurial > hg > graal-compiler

comparison src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @ 4095:bca17e38de00