comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 4095:bca17e38de00

6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads Summary: Select number of GC threads dynamically based on heap usage and number of Java threads Reviewed-by: johnc, ysr, jcoomes
author jmasa
date Tue, 09 Aug 2011 10:16:01 -0700
parents 6071e0581859
children dc467e8b2c5e
comparison
equal deleted inserted replaced
4094:3a298e04d914 4095:bca17e38de00
456 456
457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 #endif // _MSC_VER 459 #endif // _MSC_VER
460 460
461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
462 return MAX2((n_par_threads + 2) / 4, (size_t)1);
463 }
464
461 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
462 int max_regions) : 466 int max_regions) :
463 _markBitMap1(rs, MinObjAlignment - 1), 467 _markBitMap1(rs, MinObjAlignment - 1),
464 _markBitMap2(rs, MinObjAlignment - 1), 468 _markBitMap2(rs, MinObjAlignment - 1),
465 469
466 _parallel_marking_threads(0), 470 _parallel_marking_threads(0),
471 _max_parallel_marking_threads(0),
467 _sleep_factor(0.0), 472 _sleep_factor(0.0),
468 _marking_task_overhead(1.0), 473 _marking_task_overhead(1.0),
469 _cleanup_sleep_factor(0.0), 474 _cleanup_sleep_factor(0.0),
470 _cleanup_task_overhead(1.0), 475 _cleanup_task_overhead(1.0),
471 _cleanup_list("Cleanup List"), 476 _cleanup_list("Cleanup List"),
552 "than ParallelGCThreads."); 557 "than ParallelGCThreads.");
553 } 558 }
554 if (ParallelGCThreads == 0) { 559 if (ParallelGCThreads == 0) {
555 // if we are not running with any parallel GC threads we will not 560 // if we are not running with any parallel GC threads we will not
556 // spawn any marking threads either 561 // spawn any marking threads either
557 _parallel_marking_threads = 0; 562 _parallel_marking_threads = 0;
558 _sleep_factor = 0.0; 563 _max_parallel_marking_threads = 0;
559 _marking_task_overhead = 1.0; 564 _sleep_factor = 0.0;
565 _marking_task_overhead = 1.0;
560 } else { 566 } else {
561 if (ConcGCThreads > 0) { 567 if (ConcGCThreads > 0) {
562 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 568 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
563 // if both are set 569 // if both are set
564 570
565 _parallel_marking_threads = ConcGCThreads; 571 _parallel_marking_threads = ConcGCThreads;
572 _max_parallel_marking_threads = _parallel_marking_threads;
566 _sleep_factor = 0.0; 573 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0; 574 _marking_task_overhead = 1.0;
568 } else if (G1MarkingOverheadPercent > 0) { 575 } else if (G1MarkingOverheadPercent > 0) {
569 // we will calculate the number of parallel marking threads 576 // we will calculate the number of parallel marking threads
570 // based on a target overhead with respect to the soft real-time 577 // based on a target overhead with respect to the soft real-time
581 (double) os::processor_count(); 588 (double) os::processor_count();
582 double sleep_factor = 589 double sleep_factor =
583 (1.0 - marking_task_overhead) / marking_task_overhead; 590 (1.0 - marking_task_overhead) / marking_task_overhead;
584 591
585 _parallel_marking_threads = (size_t) marking_thread_num; 592 _parallel_marking_threads = (size_t) marking_thread_num;
593 _max_parallel_marking_threads = _parallel_marking_threads;
586 _sleep_factor = sleep_factor; 594 _sleep_factor = sleep_factor;
587 _marking_task_overhead = marking_task_overhead; 595 _marking_task_overhead = marking_task_overhead;
588 } else { 596 } else {
589 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 597 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
598 _max_parallel_marking_threads = _parallel_marking_threads;
590 _sleep_factor = 0.0; 599 _sleep_factor = 0.0;
591 _marking_task_overhead = 1.0; 600 _marking_task_overhead = 1.0;
592 } 601 }
593 602
594 if (parallel_marking_threads() > 1) { 603 if (parallel_marking_threads() > 1) {
607 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 616 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
608 #endif 617 #endif
609 618
610 guarantee(parallel_marking_threads() > 0, "peace of mind"); 619 guarantee(parallel_marking_threads() > 0, "peace of mind");
611 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 620 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
612 (int) _parallel_marking_threads, false, true); 621 (int) _max_parallel_marking_threads, false, true);
613 if (_parallel_workers == NULL) { 622 if (_parallel_workers == NULL) {
614 vm_exit_during_initialization("Failed necessary allocation."); 623 vm_exit_during_initialization("Failed necessary allocation.");
615 } else { 624 } else {
616 _parallel_workers->initialize_workers(); 625 _parallel_workers->initialize_workers();
617 } 626 }
1104 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1113 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 1114
1106 ~CMConcurrentMarkingTask() { } 1115 ~CMConcurrentMarkingTask() { }
1107 }; 1116 };
1108 1117
1118 // Calculates the number of active workers for a concurrent
1119 // phase.
1120 int ConcurrentMark::calc_parallel_marking_threads() {
1121
1122 size_t n_conc_workers;
1123 if (!G1CollectedHeap::use_parallel_gc_threads()) {
1124 n_conc_workers = 1;
1125 } else {
1126 if (!UseDynamicNumberOfGCThreads ||
1127 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1128 !ForceDynamicNumberOfGCThreads)) {
1129 n_conc_workers = max_parallel_marking_threads();
1130 } else {
1131 n_conc_workers =
1132 AdaptiveSizePolicy::calc_default_active_workers(
1133 max_parallel_marking_threads(),
1134 1, /* Minimum workers */
1135 parallel_marking_threads(),
1136 Threads::number_of_non_daemon_threads());
1137 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1138 // that scaling has already gone into "_max_parallel_marking_threads".
1139 }
1140 }
1141 assert(n_conc_workers > 0, "Always need at least 1");
1142 return (int) MAX2(n_conc_workers, (size_t) 1);
1143 }
1144
1109 void ConcurrentMark::markFromRoots() { 1145 void ConcurrentMark::markFromRoots() {
1110 // we might be tempted to assert that: 1146 // we might be tempted to assert that:
1111 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1147 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112 // "inconsistent argument?"); 1148 // "inconsistent argument?");
1113 // However that wouldn't be right, because it's possible that 1149 // However that wouldn't be right, because it's possible that
1114 // a safepoint is indeed in progress as a younger generation 1150 // a safepoint is indeed in progress as a younger generation
1115 // stop-the-world GC happens even as we mark in this generation. 1151 // stop-the-world GC happens even as we mark in this generation.
1116 1152
1117 _restart_for_overflow = false; 1153 _restart_for_overflow = false;
1118 1154
1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1155 // Parallel task terminator is set in "set_phase()".
1120 force_overflow_conc()->init(); 1156 force_overflow_conc()->init();
1121 set_phase(active_workers, true /* concurrent */); 1157
1158 // _g1h has _n_par_threads
1159
1160 _parallel_marking_threads = calc_parallel_marking_threads();
1161 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1162 "Maximum number of marking threads exceeded");
1163 _parallel_workers->set_active_workers((int)_parallel_marking_threads);
1164 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1165 // and the decisions on that MT processing is made elsewhere.
1166
1167 assert( _parallel_workers->active_workers() > 0, "Should have been set");
1168 set_phase(_parallel_workers->active_workers(), true /* concurrent */);
1122 1169
1123 CMConcurrentMarkingTask markingTask(this, cmThread()); 1170 CMConcurrentMarkingTask markingTask(this, cmThread());
1124 if (parallel_marking_threads() > 0) { 1171 if (parallel_marking_threads() > 0) {
1125 _parallel_workers->run_task(&markingTask); 1172 _parallel_workers->run_task(&markingTask);
1126 } else { 1173 } else {
1179 // threads to have SATB queues with active set to true. 1226 // threads to have SATB queues with active set to true.
1180 satb_mq_set.set_active_all_threads(false, /* new active value */ 1227 satb_mq_set.set_active_all_threads(false, /* new active value */
1181 true /* expected_active */); 1228 true /* expected_active */);
1182 1229
1183 if (VerifyDuringGC) { 1230 if (VerifyDuringGC) {
1231
1184 HandleMark hm; // handle scope 1232 HandleMark hm; // handle scope
1185 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1233 gclog_or_tty->print(" VerifyDuringGC:(after)");
1186 Universe::heap()->prepare_for_verify(); 1234 Universe::heap()->prepare_for_verify();
1187 Universe::verify(/* allow dirty */ true, 1235 Universe::verify(/* allow dirty */ true,
1188 /* silent */ false, 1236 /* silent */ false,
1461 BitMap* _card_bm; 1509 BitMap* _card_bm;
1462 public: 1510 public:
1463 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1511 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1464 BitMap* region_bm, BitMap* card_bm) 1512 BitMap* region_bm, BitMap* card_bm)
1465 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1513 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1466 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) { 1514 _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1467 if (ParallelGCThreads > 0) { 1515 _n_workers(0)
1468 _n_workers = _g1h->workers()->total_workers(); 1516 {
1517 // Use the value already set as the number of active threads
1518 // in the call to run_task(). Needed for the allocation of
1519 // _live_bytes and _used_bytes.
1520 if (G1CollectedHeap::use_parallel_gc_threads()) {
1521 assert( _g1h->workers()->active_workers() > 0,
1522 "Should have been previously set");
1523 _n_workers = _g1h->workers()->active_workers();
1469 } else { 1524 } else {
1470 _n_workers = 1; 1525 _n_workers = 1;
1471 } 1526 }
1527
1472 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1528 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1529 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1474 } 1530 }
1475 1531
1476 ~G1ParFinalCountTask() { 1532 ~G1ParFinalCountTask() {
1483 _bm, _g1h->concurrent_mark(), 1539 _bm, _g1h->concurrent_mark(),
1484 _region_bm, _card_bm); 1540 _region_bm, _card_bm);
1485 calccl.no_yield(); 1541 calccl.no_yield();
1486 if (G1CollectedHeap::use_parallel_gc_threads()) { 1542 if (G1CollectedHeap::use_parallel_gc_threads()) {
1487 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1543 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1544 (int) _n_workers,
1488 HeapRegion::FinalCountClaimValue); 1545 HeapRegion::FinalCountClaimValue);
1489 } else { 1546 } else {
1490 _g1h->heap_region_iterate(&calccl); 1547 _g1h->heap_region_iterate(&calccl);
1491 } 1548 }
1492 assert(calccl.complete(), "Shouldn't have yielded!"); 1549 assert(calccl.complete(), "Shouldn't have yielded!");
1598 &old_proxy_set, 1655 &old_proxy_set,
1599 &humongous_proxy_set, 1656 &humongous_proxy_set,
1600 &hrrs_cleanup_task); 1657 &hrrs_cleanup_task);
1601 if (G1CollectedHeap::use_parallel_gc_threads()) { 1658 if (G1CollectedHeap::use_parallel_gc_threads()) {
1602 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1659 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1660 _g1h->workers()->active_workers(),
1603 HeapRegion::NoteEndClaimValue); 1661 HeapRegion::NoteEndClaimValue);
1604 } else { 1662 } else {
1605 _g1h->heap_region_iterate(&g1_note_end); 1663 _g1h->heap_region_iterate(&g1_note_end);
1606 } 1664 }
1607 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1665 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1705 1763
1706 double start = os::elapsedTime(); 1764 double start = os::elapsedTime();
1707 1765
1708 HeapRegionRemSet::reset_for_cleanup_tasks(); 1766 HeapRegionRemSet::reset_for_cleanup_tasks();
1709 1767
1768 g1h->set_par_threads();
1769 size_t n_workers = g1h->n_par_threads();
1770
1710 // Do counting once more with the world stopped for good measure. 1771 // Do counting once more with the world stopped for good measure.
1711 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1772 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1712 &_region_bm, &_card_bm); 1773 &_region_bm, &_card_bm);
1713 if (G1CollectedHeap::use_parallel_gc_threads()) { 1774 if (G1CollectedHeap::use_parallel_gc_threads()) {
1714 assert(g1h->check_heap_region_claim_values( 1775 assert(g1h->check_heap_region_claim_values(
1715 HeapRegion::InitialClaimValue), 1776 HeapRegion::InitialClaimValue),
1716 "sanity check"); 1777 "sanity check");
1717 1778
1718 int n_workers = g1h->workers()->total_workers(); 1779 assert(g1h->n_par_threads() == (int) n_workers,
1719 g1h->set_par_threads(n_workers); 1780 "Should not have been reset");
1720 g1h->workers()->run_task(&g1_par_count_task); 1781 g1h->workers()->run_task(&g1_par_count_task);
1782 // Done with the parallel phase so reset to 0.
1721 g1h->set_par_threads(0); 1783 g1h->set_par_threads(0);
1722 1784
1723 assert(g1h->check_heap_region_claim_values( 1785 assert(g1h->check_heap_region_claim_values(
1724 HeapRegion::FinalCountClaimValue), 1786 HeapRegion::FinalCountClaimValue),
1725 "sanity check"); 1787 "sanity check");
1765 1827
1766 // Note end of marking in all heap regions. 1828 // Note end of marking in all heap regions.
1767 double note_end_start = os::elapsedTime(); 1829 double note_end_start = os::elapsedTime();
1768 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1830 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1769 if (G1CollectedHeap::use_parallel_gc_threads()) { 1831 if (G1CollectedHeap::use_parallel_gc_threads()) {
1770 int n_workers = g1h->workers()->total_workers(); 1832 g1h->set_par_threads((int)n_workers);
1771 g1h->set_par_threads(n_workers);
1772 g1h->workers()->run_task(&g1_par_note_end_task); 1833 g1h->workers()->run_task(&g1_par_note_end_task);
1773 g1h->set_par_threads(0); 1834 g1h->set_par_threads(0);
1774 1835
1775 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1836 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1776 "sanity check"); 1837 "sanity check");
1795 // regions. 1856 // regions.
1796 if (G1ScrubRemSets) { 1857 if (G1ScrubRemSets) {
1797 double rs_scrub_start = os::elapsedTime(); 1858 double rs_scrub_start = os::elapsedTime();
1798 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1859 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1799 if (G1CollectedHeap::use_parallel_gc_threads()) { 1860 if (G1CollectedHeap::use_parallel_gc_threads()) {
1800 int n_workers = g1h->workers()->total_workers(); 1861 g1h->set_par_threads((int)n_workers);
1801 g1h->set_par_threads(n_workers);
1802 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1862 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1803 g1h->set_par_threads(0); 1863 g1h->set_par_threads(0);
1804 1864
1805 assert(g1h->check_heap_region_claim_values( 1865 assert(g1h->check_heap_region_claim_values(
1806 HeapRegion::ScrubRemSetClaimValue), 1866 HeapRegion::ScrubRemSetClaimValue),
1814 _total_rs_scrub_time += this_rs_scrub_time; 1874 _total_rs_scrub_time += this_rs_scrub_time;
1815 } 1875 }
1816 1876
1817 // this will also free any regions totally full of garbage objects, 1877 // this will also free any regions totally full of garbage objects,
1818 // and sort the regions. 1878 // and sort the regions.
1819 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1879 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1820 1880
1821 // Statistics. 1881 // Statistics.
1822 double end = os::elapsedTime(); 1882 double end = os::elapsedTime();
1823 _cleanup_times.add((end - start) * 1000.0); 1883 _cleanup_times.add((end - start) * 1000.0);
1824 1884
2185 G1CMDrainMarkingStackClosure 2245 G1CMDrainMarkingStackClosure
2186 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2246 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2187 2247
2188 // We use the work gang from the G1CollectedHeap and we utilize all 2248 // We use the work gang from the G1CollectedHeap and we utilize all
2189 // the worker threads. 2249 // the worker threads.
2190 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2250 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2191 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2251 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2192 2252
2193 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2253 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2194 g1h->workers(), active_workers); 2254 g1h->workers(), active_workers);
2195 2255
2268 task->record_end_time(); 2328 task->record_end_time();
2269 } 2329 }
2270 } 2330 }
2271 2331
2272 CMRemarkTask(ConcurrentMark* cm) : 2332 CMRemarkTask(ConcurrentMark* cm) :
2273 AbstractGangTask("Par Remark"), _cm(cm) { } 2333 AbstractGangTask("Par Remark"), _cm(cm) {
2334 _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers());
2335 }
2274 }; 2336 };
2275 2337
2276 void ConcurrentMark::checkpointRootsFinalWork() { 2338 void ConcurrentMark::checkpointRootsFinalWork() {
2277 ResourceMark rm; 2339 ResourceMark rm;
2278 HandleMark hm; 2340 HandleMark hm;
2280 2342
2281 g1h->ensure_parsability(false); 2343 g1h->ensure_parsability(false);
2282 2344
2283 if (G1CollectedHeap::use_parallel_gc_threads()) { 2345 if (G1CollectedHeap::use_parallel_gc_threads()) {
2284 G1CollectedHeap::StrongRootsScope srs(g1h); 2346 G1CollectedHeap::StrongRootsScope srs(g1h);
2285 // this is remark, so we'll use up all available threads 2347 // this is remark, so we'll use up all active threads
2286 int active_workers = ParallelGCThreads; 2348 int active_workers = g1h->workers()->active_workers();
2349 if (active_workers == 0) {
2350 assert(active_workers > 0, "Should have been set earlier");
2351 active_workers = ParallelGCThreads;
2352 g1h->workers()->set_active_workers(active_workers);
2353 }
2287 set_phase(active_workers, false /* concurrent */); 2354 set_phase(active_workers, false /* concurrent */);
2355 // Leave _parallel_marking_threads at it's
2356 // value originally calculated in the ConcurrentMark
2357 // constructor and pass values of the active workers
2358 // through the gang in the task.
2288 2359
2289 CMRemarkTask remarkTask(this); 2360 CMRemarkTask remarkTask(this);
2290 // We will start all available threads, even if we decide that the 2361 g1h->set_par_threads(active_workers);
2291 // active_workers will be fewer. The extra ones will just bail out
2292 // immediately.
2293 int n_workers = g1h->workers()->total_workers();
2294 g1h->set_par_threads(n_workers);
2295 g1h->workers()->run_task(&remarkTask); 2362 g1h->workers()->run_task(&remarkTask);
2296 g1h->set_par_threads(0); 2363 g1h->set_par_threads(0);
2297 } else { 2364 } else {
2298 G1CollectedHeap::StrongRootsScope srs(g1h); 2365 G1CollectedHeap::StrongRootsScope srs(g1h);
2299 // this is remark, so we'll use up all available threads 2366 // this is remark, so we'll use up all available threads