comparison src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @ 4095:bca17e38de00

6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads Summary: Select number of GC threads dynamically based on heap usage and number of Java threads Reviewed-by: johnc, ysr, jcoomes
author jmasa
date Tue, 09 Aug 2011 10:16:01 -0700
parents a88de71c4e3a
children dc467e8b2c5e
comparison
equal deleted inserted replaced
4094:3a298e04d914 4095:bca17e38de00
63 // serialized by acquiring the HeapLock. This happens in mem_allocate 63 // serialized by acquiring the HeapLock. This happens in mem_allocate
64 // and allocate_new_tlab, which are the "entry" points to the 64 // and allocate_new_tlab, which are the "entry" points to the
65 // allocation code from the rest of the JVM. (Note that this does not 65 // allocation code from the rest of the JVM. (Note that this does not
66 // apply to TLAB allocation, which is not part of this interface: it 66 // apply to TLAB allocation, which is not part of this interface: it
67 // is done by clients of this interface.) 67 // is done by clients of this interface.)
68
69 // Notes on implementation of parallelism in different tasks.
70 //
71 // G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
72 // The number of GC workers is passed to heap_region_par_iterate_chunked().
73 // It does use run_task() which sets _n_workers in the task.
74 // G1ParTask executes g1_process_strong_roots() ->
75 // SharedHeap::process_strong_roots() which calls eventuall to
76 // CardTableModRefBS::par_non_clean_card_iterate_work() which uses
77 // SequentialSubTasksDone. SharedHeap::process_strong_roots() also
78 // directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
79 //
68 80
69 // Local to this file. 81 // Local to this file.
70 82
71 class RefineCardTableEntryClosure: public CardTableEntryClosure { 83 class RefineCardTableEntryClosure: public CardTableEntryClosure {
72 SuspendibleThreadSet* _sts; 84 SuspendibleThreadSet* _sts;
1154 { } 1166 { }
1155 1167
1156 void work(int i) { 1168 void work(int i) {
1157 RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); 1169 RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
1158 _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, 1170 _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
1171 _g1->workers()->active_workers(),
1159 HeapRegion::RebuildRSClaimValue); 1172 HeapRegion::RebuildRSClaimValue);
1160 } 1173 }
1161 }; 1174 };
1162 1175
1163 class PostCompactionPrinterClosure: public HeapRegionClosure { 1176 class PostCompactionPrinterClosure: public HeapRegionClosure {
1358 _cg1r->clear_and_record_card_counts(); 1371 _cg1r->clear_and_record_card_counts();
1359 _cg1r->clear_hot_cache(); 1372 _cg1r->clear_hot_cache();
1360 } 1373 }
1361 1374
1362 // Rebuild remembered sets of all regions. 1375 // Rebuild remembered sets of all regions.
1363
1364 if (G1CollectedHeap::use_parallel_gc_threads()) { 1376 if (G1CollectedHeap::use_parallel_gc_threads()) {
1377 int n_workers =
1378 AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
1379 workers()->active_workers(),
1380 Threads::number_of_non_daemon_threads());
1381 assert(UseDynamicNumberOfGCThreads ||
1382 n_workers == workers()->total_workers(),
1383 "If not dynamic should be using all the workers");
1384 workers()->set_active_workers(n_workers);
1385 // Set parallel threads in the heap (_n_par_threads) only
1386 // before a parallel phase and always reset it to 0 after
1387 // the phase so that the number of parallel threads does
1388 // no get carried forward to a serial phase where there
1389 // may be code that is "possibly_parallel".
1390 set_par_threads(n_workers);
1391
1365 ParRebuildRSTask rebuild_rs_task(this); 1392 ParRebuildRSTask rebuild_rs_task(this);
1366 assert(check_heap_region_claim_values( 1393 assert(check_heap_region_claim_values(
1367 HeapRegion::InitialClaimValue), "sanity check"); 1394 HeapRegion::InitialClaimValue), "sanity check");
1368 set_par_threads(workers()->total_workers()); 1395 assert(UseDynamicNumberOfGCThreads ||
1396 workers()->active_workers() == workers()->total_workers(),
1397 "Unless dynamic should use total workers");
1398 // Use the most recent number of active workers
1399 assert(workers()->active_workers() > 0,
1400 "Active workers not properly set");
1401 set_par_threads(workers()->active_workers());
1369 workers()->run_task(&rebuild_rs_task); 1402 workers()->run_task(&rebuild_rs_task);
1370 set_par_threads(0); 1403 set_par_threads(0);
1371 assert(check_heap_region_claim_values( 1404 assert(check_heap_region_claim_values(
1372 HeapRegion::RebuildRSClaimValue), "sanity check"); 1405 HeapRegion::RebuildRSClaimValue), "sanity check");
1373 reset_heap_region_claim_values(); 1406 reset_heap_region_claim_values();
2475 } 2508 }
2476 2509
2477 void 2510 void
2478 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, 2511 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
2479 int worker, 2512 int worker,
2513 int no_of_par_workers,
2480 jint claim_value) { 2514 jint claim_value) {
2481 const size_t regions = n_regions(); 2515 const size_t regions = n_regions();
2482 const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); 2516 const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2517 no_of_par_workers :
2518 1);
2519 assert(UseDynamicNumberOfGCThreads ||
2520 no_of_par_workers == workers()->total_workers(),
2521 "Non dynamic should use fixed number of workers");
2483 // try to spread out the starting points of the workers 2522 // try to spread out the starting points of the workers
2484 const size_t start_index = regions / worker_num * (size_t) worker; 2523 const size_t start_index = regions / max_workers * (size_t) worker;
2485 2524
2486 // each worker will actually look at all regions 2525 // each worker will actually look at all regions
2487 for (size_t count = 0; count < regions; ++count) { 2526 for (size_t count = 0; count < regions; ++count) {
2488 const size_t index = (start_index + count) % regions; 2527 const size_t index = (start_index + count) % regions;
2489 assert(0 <= index && index < regions, "sanity"); 2528 assert(0 <= index && index < regions, "sanity");
2918 2957
2919 void work(int worker_i) { 2958 void work(int worker_i) {
2920 HandleMark hm; 2959 HandleMark hm;
2921 VerifyRegionClosure blk(_allow_dirty, true, _vo); 2960 VerifyRegionClosure blk(_allow_dirty, true, _vo);
2922 _g1h->heap_region_par_iterate_chunked(&blk, worker_i, 2961 _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
2962 _g1h->workers()->active_workers(),
2923 HeapRegion::ParVerifyClaimValue); 2963 HeapRegion::ParVerifyClaimValue);
2924 if (blk.failures()) { 2964 if (blk.failures()) {
2925 _failures = true; 2965 _failures = true;
2926 } 2966 }
2927 } 2967 }
2935 bool silent, 2975 bool silent,
2936 VerifyOption vo) { 2976 VerifyOption vo) {
2937 if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { 2977 if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
2938 if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); } 2978 if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
2939 VerifyRootsClosure rootsCl(vo); 2979 VerifyRootsClosure rootsCl(vo);
2980
2981 assert(Thread::current()->is_VM_thread(),
2982 "Expected to be executed serially by the VM thread at this point");
2983
2940 CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); 2984 CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
2941 2985
2942 // We apply the relevant closures to all the oops in the 2986 // We apply the relevant closures to all the oops in the
2943 // system dictionary, the string table and the code cache. 2987 // system dictionary, the string table and the code cache.
2944 const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache; 2988 const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
2979 if (GCParallelVerificationEnabled && ParallelGCThreads > 1) { 3023 if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
2980 assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3024 assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2981 "sanity check"); 3025 "sanity check");
2982 3026
2983 G1ParVerifyTask task(this, allow_dirty, vo); 3027 G1ParVerifyTask task(this, allow_dirty, vo);
2984 int n_workers = workers()->total_workers(); 3028 assert(UseDynamicNumberOfGCThreads ||
3029 workers()->active_workers() == workers()->total_workers(),
3030 "If not dynamic should be using all the workers");
3031 int n_workers = workers()->active_workers();
2985 set_par_threads(n_workers); 3032 set_par_threads(n_workers);
2986 workers()->run_task(&task); 3033 workers()->run_task(&task);
2987 set_par_threads(0); 3034 set_par_threads(0);
2988 if (task.failures()) { 3035 if (task.failures()) {
2989 failures = true; 3036 failures = true;
2990 } 3037 }
2991 3038
3039 // Checks that the expected amount of parallel work was done.
3040 // The implication is that n_workers is > 0.
2992 assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue), 3041 assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
2993 "sanity check"); 3042 "sanity check");
2994 3043
2995 reset_heap_region_claim_values(); 3044 reset_heap_region_claim_values();
2996 3045
3400 } 3449 }
3401 3450
3402 assert(check_young_list_well_formed(), 3451 assert(check_young_list_well_formed(),
3403 "young list should be well formed"); 3452 "young list should be well formed");
3404 3453
3454 // Don't dynamically change the number of GC threads this early. A value of
3455 // 0 is used to indicate serial work. When parallel work is done,
3456 // it will be set.
3457
3405 { // Call to jvmpi::post_class_unload_events must occur outside of active GC 3458 { // Call to jvmpi::post_class_unload_events must occur outside of active GC
3406 IsGCActiveMark x; 3459 IsGCActiveMark x;
3407 3460
3408 gc_prologue(false); 3461 gc_prologue(false);
3409 increment_total_collections(false /* full gc */); 3462 increment_total_collections(false /* full gc */);
3613 } 3666 }
3614 3667
3615 double end_time_sec = os::elapsedTime(); 3668 double end_time_sec = os::elapsedTime();
3616 double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; 3669 double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
3617 g1_policy()->record_pause_time_ms(pause_time_ms); 3670 g1_policy()->record_pause_time_ms(pause_time_ms);
3618 g1_policy()->record_collection_pause_end(); 3671 int active_gc_threads = workers()->active_workers();
3672 g1_policy()->record_collection_pause_end(active_gc_threads);
3619 3673
3620 MemoryService::track_memory_usage(); 3674 MemoryService::track_memory_usage();
3621 3675
3622 // In prepare_for_verify() below we'll need to scan the deferred 3676 // In prepare_for_verify() below we'll need to scan the deferred
3623 // update buffers to bring the RSets up-to-date if 3677 // update buffers to bring the RSets up-to-date if
4560 return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) 4614 return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
4561 / G1BlockOffsetSharedArray::N_bytes; 4615 / G1BlockOffsetSharedArray::N_bytes;
4562 } 4616 }
4563 4617
4564 public: 4618 public:
4565 G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) 4619 G1ParTask(G1CollectedHeap* g1h,
4620 RefToScanQueueSet *task_queues)
4566 : AbstractGangTask("G1 collection"), 4621 : AbstractGangTask("G1 collection"),
4567 _g1h(g1h), 4622 _g1h(g1h),
4568 _queues(task_queues), 4623 _queues(task_queues),
4569 _terminator(workers, _queues), 4624 _terminator(0, _queues),
4570 _stats_lock(Mutex::leaf, "parallel G1 stats lock", true), 4625 _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
4571 _n_workers(workers)
4572 {} 4626 {}
4573 4627
4574 RefToScanQueueSet* queues() { return _queues; } 4628 RefToScanQueueSet* queues() { return _queues; }
4575 4629
4576 RefToScanQueue *work_queue(int i) { 4630 RefToScanQueue *work_queue(int i) {
4577 return queues()->queue(i); 4631 return queues()->queue(i);
4632 }
4633
4634 ParallelTaskTerminator* terminator() { return &_terminator; }
4635
4636 virtual void set_for_termination(int active_workers) {
4637 // This task calls set_n_termination() in par_non_clean_card_iterate_work()
4638 // in the young space (_par_seq_tasks) in the G1 heap
4639 // for SequentialSubTasksDone.
4640 // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
4641 // both of which need setting by set_n_termination().
4642 _g1h->SharedHeap::set_n_termination(active_workers);
4643 _g1h->set_n_termination(active_workers);
4644 terminator()->reset_for_reuse(active_workers);
4645 _n_workers = active_workers;
4578 } 4646 }
4579 4647
4580 void work(int i) { 4648 void work(int i) {
4581 if (i >= _n_workers) return; // no work needed this round 4649 if (i >= _n_workers) return; // no work needed this round
4582 4650
4859 4927
4860 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 4928 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
4861 private: 4929 private:
4862 G1CollectedHeap* _g1h; 4930 G1CollectedHeap* _g1h;
4863 RefToScanQueueSet* _queues; 4931 RefToScanQueueSet* _queues;
4864 WorkGang* _workers; 4932 FlexibleWorkGang* _workers;
4865 int _active_workers; 4933 int _active_workers;
4866 4934
4867 public: 4935 public:
4868 G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, 4936 G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
4869 WorkGang* workers, 4937 FlexibleWorkGang* workers,
4870 RefToScanQueueSet *task_queues, 4938 RefToScanQueueSet *task_queues,
4871 int n_workers) : 4939 int n_workers) :
4872 _g1h(g1h), 4940 _g1h(g1h),
4873 _queues(task_queues), 4941 _queues(task_queues),
4874 _workers(workers), 4942 _workers(workers),
5120 // We also need to do this copying before we process the reference 5188 // We also need to do this copying before we process the reference
5121 // objects discovered by the STW ref processor in case one of these 5189 // objects discovered by the STW ref processor in case one of these
5122 // referents points to another object which is also referenced by an 5190 // referents points to another object which is also referenced by an
5123 // object discovered by the STW ref processor. 5191 // object discovered by the STW ref processor.
5124 5192
5125 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 5193 int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
5126 workers()->total_workers() : 1); 5194 workers()->active_workers() : 1);
5127 5195
5128 set_par_threads(n_workers); 5196 assert(active_workers == workers()->active_workers(),
5129 G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues); 5197 "Need to reset active_workers");
5198 set_par_threads(active_workers);
5199 G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues);
5130 5200
5131 if (G1CollectedHeap::use_parallel_gc_threads()) { 5201 if (G1CollectedHeap::use_parallel_gc_threads()) {
5132 workers()->run_task(&keep_cm_referents); 5202 workers()->run_task(&keep_cm_referents);
5133 } else { 5203 } else {
5134 keep_cm_referents.work(0); 5204 keep_cm_referents.work(0);
5190 &keep_alive, 5260 &keep_alive,
5191 &drain_queue, 5261 &drain_queue,
5192 NULL); 5262 NULL);
5193 } else { 5263 } else {
5194 // Parallel reference processing 5264 // Parallel reference processing
5195 int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
5196 assert(rp->num_q() == active_workers, "sanity"); 5265 assert(rp->num_q() == active_workers, "sanity");
5197 assert(active_workers <= rp->max_num_q(), "sanity"); 5266 assert(active_workers <= rp->max_num_q(), "sanity");
5198 5267
5199 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers); 5268 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers);
5200 rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, &par_task_executor); 5269 rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, &par_task_executor);
5223 // Serial reference processing... 5292 // Serial reference processing...
5224 rp->enqueue_discovered_references(); 5293 rp->enqueue_discovered_references();
5225 } else { 5294 } else {
5226 // Parallel reference enqueuing 5295 // Parallel reference enqueuing
5227 5296
5228 int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); 5297 int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
5298 assert(active_workers == workers()->active_workers(),
5299 "Need to reset active_workers");
5229 assert(rp->num_q() == active_workers, "sanity"); 5300 assert(rp->num_q() == active_workers, "sanity");
5230 assert(active_workers <= rp->max_num_q(), "sanity"); 5301 assert(active_workers <= rp->max_num_q(), "sanity");
5231 5302
5232 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers); 5303 G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers);
5233 rp->enqueue_discovered_references(&par_task_executor); 5304 rp->enqueue_discovered_references(&par_task_executor);
5250 5321
5251 g1_rem_set()->prepare_for_oops_into_collection_set_do(); 5322 g1_rem_set()->prepare_for_oops_into_collection_set_do();
5252 concurrent_g1_refine()->set_use_cache(false); 5323 concurrent_g1_refine()->set_use_cache(false);
5253 concurrent_g1_refine()->clear_hot_cache_claimed_index(); 5324 concurrent_g1_refine()->clear_hot_cache_claimed_index();
5254 5325
5255 int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); 5326 int n_workers;
5256 set_par_threads(n_workers); 5327 if (G1CollectedHeap::use_parallel_gc_threads()) {
5257 G1ParTask g1_par_task(this, n_workers, _task_queues); 5328 n_workers =
5329 AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
5330 workers()->active_workers(),
5331 Threads::number_of_non_daemon_threads());
5332 assert(UseDynamicNumberOfGCThreads ||
5333 n_workers == workers()->total_workers(),
5334 "If not dynamic should be using all the workers");
5335 set_par_threads(n_workers);
5336 } else {
5337 assert(n_par_threads() == 0,
5338 "Should be the original non-parallel value");
5339 n_workers = 1;
5340 }
5341 workers()->set_active_workers(n_workers);
5342
5343 G1ParTask g1_par_task(this, _task_queues);
5258 5344
5259 init_for_evac_failure(NULL); 5345 init_for_evac_failure(NULL);
5260 5346
5261 rem_set()->prepare_for_younger_refs_iterate(true); 5347 rem_set()->prepare_for_younger_refs_iterate(true);
5262 5348
5265 5351
5266 if (G1CollectedHeap::use_parallel_gc_threads()) { 5352 if (G1CollectedHeap::use_parallel_gc_threads()) {
5267 // The individual threads will set their evac-failure closures. 5353 // The individual threads will set their evac-failure closures.
5268 StrongRootsScope srs(this); 5354 StrongRootsScope srs(this);
5269 if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); 5355 if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
5356 // These tasks use ShareHeap::_process_strong_tasks
5357 assert(UseDynamicNumberOfGCThreads ||
5358 workers()->active_workers() == workers()->total_workers(),
5359 "If not dynamic should be using all the workers");
5270 workers()->run_task(&g1_par_task); 5360 workers()->run_task(&g1_par_task);
5271 } else { 5361 } else {
5272 StrongRootsScope srs(this); 5362 StrongRootsScope srs(this);
5273 g1_par_task.work(0); 5363 g1_par_task.work(0);
5274 } 5364 }
5275 5365
5276 double par_time = (os::elapsedTime() - start_par) * 1000.0; 5366 double par_time = (os::elapsedTime() - start_par) * 1000.0;
5277 g1_policy()->record_par_time(par_time); 5367 g1_policy()->record_par_time(par_time);
5368
5278 set_par_threads(0); 5369 set_par_threads(0);
5279 5370
5280 // Process any discovered reference objects - we have 5371 // Process any discovered reference objects - we have
5281 // to do this _before_ we retire the GC alloc regions 5372 // to do this _before_ we retire the GC alloc regions
5282 // as we may have to copy some 'reachable' referent 5373 // as we may have to copy some 'reachable' referent
5901 } 5992 }
5902 5993
5903 HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size, 5994 HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size,
5904 bool force) { 5995 bool force) {
5905 return _g1h->new_mutator_alloc_region(word_size, force); 5996 return _g1h->new_mutator_alloc_region(word_size, force);
5997 }
5998
5999 void G1CollectedHeap::set_par_threads() {
6000 // Don't change the number of workers. Use the value previously set
6001 // in the workgroup.
6002 int n_workers = workers()->active_workers();
6003 assert(UseDynamicNumberOfGCThreads ||
6004 n_workers == workers()->total_workers(),
6005 "Otherwise should be using the total number of workers");
6006 if (n_workers == 0) {
6007 assert(false, "Should have been set in prior evacuation pause.");
6008 n_workers = ParallelGCThreads;
6009 workers()->set_active_workers(n_workers);
6010 }
6011 set_par_threads(n_workers);
5906 } 6012 }
5907 6013
5908 void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, 6014 void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
5909 size_t allocated_bytes) { 6015 size_t allocated_bytes) {
5910 _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes); 6016 _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);