comparison src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @ 4095:bca17e38de00

6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads Summary: Select number of GC threads dynamically based on heap usage and number of Java threads Reviewed-by: johnc, ysr, jcoomes
author jmasa
date Tue, 09 Aug 2011 10:16:01 -0700
parents 4dfb2df418f2
children 7913e93dca52
comparison
equal deleted inserted replaced
4094:3a298e04d914 4095:bca17e38de00
2043 2043
2044 { 2044 {
2045 ResourceMark rm; 2045 ResourceMark rm;
2046 HandleMark hm; 2046 HandleMark hm;
2047 2047
2048 // Set the number of GC threads to be used in this collection
2049 gc_task_manager()->set_active_gang();
2050 gc_task_manager()->task_idle_workers();
2051 heap->set_par_threads(gc_task_manager()->active_workers());
2052
2048 const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc; 2053 const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc;
2049 2054
2050 // This is useful for debugging but don't change the output the 2055 // This is useful for debugging but don't change the output the
2051 // the customer sees. 2056 // the customer sees.
2052 const char* gc_cause_str = "Full GC"; 2057 const char* gc_cause_str = "Full GC";
2195 } 2200 }
2196 2201
2197 // Track memory usage and detect low memory 2202 // Track memory usage and detect low memory
2198 MemoryService::track_memory_usage(); 2203 MemoryService::track_memory_usage();
2199 heap->update_counters(); 2204 heap->update_counters();
2205 gc_task_manager()->release_idle_workers();
2200 } 2206 }
2201 2207
2202 #ifdef ASSERT 2208 #ifdef ASSERT
2203 for (size_t i = 0; i < ParallelGCThreads + 1; ++i) { 2209 for (size_t i = 0; i < ParallelGCThreads + 1; ++i) {
2204 ParCompactionManager* const cm = 2210 ParCompactionManager* const cm =
2205 ParCompactionManager::manager_array(int(i)); 2211 ParCompactionManager::manager_array(int(i));
2206 assert(cm->marking_stack()->is_empty(), "should be empty"); 2212 assert(cm->marking_stack()->is_empty(), "should be empty");
2207 assert(cm->region_stack()->is_empty(), "should be empty"); 2213 assert(ParCompactionManager::region_list(int(i))->is_empty(), "should be empty");
2208 assert(cm->revisit_klass_stack()->is_empty(), "should be empty"); 2214 assert(cm->revisit_klass_stack()->is_empty(), "should be empty");
2209 } 2215 }
2210 #endif // ASSERT 2216 #endif // ASSERT
2211 2217
2212 if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) { 2218 if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) {
2349 EventMark m("1 mark object"); 2355 EventMark m("1 mark object");
2350 TraceTime tm("marking phase", print_phases(), true, gclog_or_tty); 2356 TraceTime tm("marking phase", print_phases(), true, gclog_or_tty);
2351 2357
2352 ParallelScavengeHeap* heap = gc_heap(); 2358 ParallelScavengeHeap* heap = gc_heap();
2353 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 2359 uint parallel_gc_threads = heap->gc_task_manager()->workers();
2360 uint active_gc_threads = heap->gc_task_manager()->active_workers();
2354 TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 2361 TaskQueueSetSuper* qset = ParCompactionManager::region_array();
2355 ParallelTaskTerminator terminator(parallel_gc_threads, qset); 2362 ParallelTaskTerminator terminator(active_gc_threads, qset);
2356 2363
2357 PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); 2364 PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
2358 PSParallelCompact::FollowStackClosure follow_stack_closure(cm); 2365 PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
2359 2366
2360 { 2367 {
2372 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::management)); 2379 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::management));
2373 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::system_dictionary)); 2380 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::system_dictionary));
2374 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti)); 2381 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti));
2375 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache)); 2382 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache));
2376 2383
2377 if (parallel_gc_threads > 1) { 2384 if (active_gc_threads > 1) {
2378 for (uint j = 0; j < parallel_gc_threads; j++) { 2385 for (uint j = 0; j < active_gc_threads; j++) {
2379 q->enqueue(new StealMarkingTask(&terminator)); 2386 q->enqueue(new StealMarkingTask(&terminator));
2380 } 2387 }
2381 } 2388 }
2382 2389
2383 WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); 2390 gc_task_manager()->execute_and_wait(q);
2384 q->enqueue(fin);
2385
2386 gc_task_manager()->add_list(q);
2387
2388 fin->wait_for();
2389
2390 // We have to release the barrier tasks!
2391 WaitForBarrierGCTask::destroy(fin);
2392 } 2391 }
2393 2392
2394 // Process reference objects found during marking 2393 // Process reference objects found during marking
2395 { 2394 {
2396 TraceTime tm_r("reference processing", print_phases(), true, gclog_or_tty); 2395 TraceTime tm_r("reference processing", print_phases(), true, gclog_or_tty);
2481 void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q, 2480 void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
2482 uint parallel_gc_threads) 2481 uint parallel_gc_threads)
2483 { 2482 {
2484 TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); 2483 TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
2485 2484
2486 const unsigned int task_count = MAX2(parallel_gc_threads, 1U); 2485 // Find the threads that are active
2487 for (unsigned int j = 0; j < task_count; j++) { 2486 unsigned int which = 0;
2487
2488 const uint task_count = MAX2(parallel_gc_threads, 1U);
2489 for (uint j = 0; j < task_count; j++) {
2488 q->enqueue(new DrainStacksCompactionTask(j)); 2490 q->enqueue(new DrainStacksCompactionTask(j));
2489 } 2491 ParCompactionManager::verify_region_list_empty(j);
2492 // Set the region stacks variables to "no" region stack values
2493 // so that they will be recognized and needing a region stack
2494 // in the stealing tasks if they do not get one by executing
2495 // a draining stack.
2496 ParCompactionManager* cm = ParCompactionManager::manager_array(j);
2497 cm->set_region_stack(NULL);
2498 cm->set_region_stack_index((uint)max_uintx);
2499 }
2500 ParCompactionManager::reset_recycled_stack_index();
2490 2501
2491 // Find all regions that are available (can be filled immediately) and 2502 // Find all regions that are available (can be filled immediately) and
2492 // distribute them to the thread stacks. The iteration is done in reverse 2503 // distribute them to the thread stacks. The iteration is done in reverse
2493 // order (high to low) so the regions will be removed in ascending order. 2504 // order (high to low) so the regions will be removed in ascending order.
2494 2505
2495 const ParallelCompactData& sd = PSParallelCompact::summary_data(); 2506 const ParallelCompactData& sd = PSParallelCompact::summary_data();
2496 2507
2497 size_t fillable_regions = 0; // A count for diagnostic purposes. 2508 size_t fillable_regions = 0; // A count for diagnostic purposes.
2498 unsigned int which = 0; // The worker thread number. 2509 // A region index which corresponds to the tasks created above.
2499 2510 // "which" must be 0 <= which < task_count
2511
2512 which = 0;
2500 for (unsigned int id = to_space_id; id > perm_space_id; --id) { 2513 for (unsigned int id = to_space_id; id > perm_space_id; --id) {
2501 SpaceInfo* const space_info = _space_info + id; 2514 SpaceInfo* const space_info = _space_info + id;
2502 MutableSpace* const space = space_info->space(); 2515 MutableSpace* const space = space_info->space();
2503 HeapWord* const new_top = space_info->new_top(); 2516 HeapWord* const new_top = space_info->new_top();
2504 2517
2507 sd.addr_to_region_idx(sd.region_align_up(new_top)); 2520 sd.addr_to_region_idx(sd.region_align_up(new_top));
2508 assert(end_region > 0, "perm gen cannot be empty"); 2521 assert(end_region > 0, "perm gen cannot be empty");
2509 2522
2510 for (size_t cur = end_region - 1; cur >= beg_region; --cur) { 2523 for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
2511 if (sd.region(cur)->claim_unsafe()) { 2524 if (sd.region(cur)->claim_unsafe()) {
2512 ParCompactionManager* cm = ParCompactionManager::manager_array(which); 2525 ParCompactionManager::region_list_push(which, cur);
2513 cm->push_region(cur);
2514 2526
2515 if (TraceParallelOldGCCompactionPhase && Verbose) { 2527 if (TraceParallelOldGCCompactionPhase && Verbose) {
2516 const size_t count_mod_8 = fillable_regions & 7; 2528 const size_t count_mod_8 = fillable_regions & 7;
2517 if (count_mod_8 == 0) gclog_or_tty->print("fillable: "); 2529 if (count_mod_8 == 0) gclog_or_tty->print("fillable: ");
2518 gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur); 2530 gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur);
2519 if (count_mod_8 == 7) gclog_or_tty->cr(); 2531 if (count_mod_8 == 7) gclog_or_tty->cr();
2520 } 2532 }
2521 2533
2522 NOT_PRODUCT(++fillable_regions;) 2534 NOT_PRODUCT(++fillable_regions;)
2523 2535
2524 // Assign regions to threads in round-robin fashion. 2536 // Assign regions to tasks in round-robin fashion.
2525 if (++which == task_count) { 2537 if (++which == task_count) {
2538 assert(which <= parallel_gc_threads,
2539 "Inconsistent number of workers");
2526 which = 0; 2540 which = 0;
2527 } 2541 }
2528 } 2542 }
2529 } 2543 }
2530 } 2544 }
2640 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 2654 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
2641 assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); 2655 assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
2642 PSOldGen* old_gen = heap->old_gen(); 2656 PSOldGen* old_gen = heap->old_gen();
2643 old_gen->start_array()->reset(); 2657 old_gen->start_array()->reset();
2644 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 2658 uint parallel_gc_threads = heap->gc_task_manager()->workers();
2659 uint active_gc_threads = heap->gc_task_manager()->active_workers();
2645 TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 2660 TaskQueueSetSuper* qset = ParCompactionManager::region_array();
2646 ParallelTaskTerminator terminator(parallel_gc_threads, qset); 2661 ParallelTaskTerminator terminator(active_gc_threads, qset);
2647 2662
2648 GCTaskQueue* q = GCTaskQueue::create(); 2663 GCTaskQueue* q = GCTaskQueue::create();
2649 enqueue_region_draining_tasks(q, parallel_gc_threads); 2664 enqueue_region_draining_tasks(q, active_gc_threads);
2650 enqueue_dense_prefix_tasks(q, parallel_gc_threads); 2665 enqueue_dense_prefix_tasks(q, active_gc_threads);
2651 enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); 2666 enqueue_region_stealing_tasks(q, &terminator, active_gc_threads);
2652 2667
2653 { 2668 {
2654 TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); 2669 TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
2655 2670
2656 WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); 2671 gc_task_manager()->execute_and_wait(q);
2657 q->enqueue(fin);
2658
2659 gc_task_manager()->add_list(q);
2660
2661 fin->wait_for();
2662
2663 // We have to release the barrier tasks!
2664 WaitForBarrierGCTask::destroy(fin);
2665 2672
2666 #ifdef ASSERT 2673 #ifdef ASSERT
2667 // Verify that all regions have been processed before the deferred updates. 2674 // Verify that all regions have been processed before the deferred updates.
2668 // Note that perm_space_id is skipped; this type of verification is not 2675 // Note that perm_space_id is skipped; this type of verification is not
2669 // valid until the perm gen is compacted by regions. 2676 // valid until the perm gen is compacted by regions.
2727 2734
2728 void 2735 void
2729 PSParallelCompact::follow_weak_klass_links() { 2736 PSParallelCompact::follow_weak_klass_links() {
2730 // All klasses on the revisit stack are marked at this point. 2737 // All klasses on the revisit stack are marked at this point.
2731 // Update and follow all subklass, sibling and implementor links. 2738 // Update and follow all subklass, sibling and implementor links.
2739 // Check all the stacks here even if not all the workers are active.
2740 // There is no accounting which indicates which stacks might have
2741 // contents to be followed.
2732 if (PrintRevisitStats) { 2742 if (PrintRevisitStats) {
2733 gclog_or_tty->print_cr("#classes in system dictionary = %d", 2743 gclog_or_tty->print_cr("#classes in system dictionary = %d",
2734 SystemDictionary::number_of_classes()); 2744 SystemDictionary::number_of_classes());
2735 } 2745 }
2736 for (uint i = 0; i < ParallelGCThreads + 1; i++) { 2746 for (uint i = 0; i < ParallelGCThreads + 1; i++) {