comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 4836:d30fa85f9994

6484965: G1: piggy-back liveness accounting phase on marking Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked. Reviewed-by: brutisso, tonyp
author johnc
date Thu, 12 Jan 2012 00:06:47 -0800
parents 0b3d1ec6eaee
children eff609af17d7
comparison
equal deleted inserted replaced
4835:877914d90c57 4836:d30fa85f9994
482 _cleanup_list("Cleanup List"), 482 _cleanup_list("Cleanup List"),
483 _region_bm(max_regions, false /* in_resource_area*/), 483 _region_bm(max_regions, false /* in_resource_area*/),
484 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 484 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
485 CardTableModRefBS::card_shift, 485 CardTableModRefBS::card_shift,
486 false /* in_resource_area*/), 486 false /* in_resource_area*/),
487
487 _prevMarkBitMap(&_markBitMap1), 488 _prevMarkBitMap(&_markBitMap1),
488 _nextMarkBitMap(&_markBitMap2), 489 _nextMarkBitMap(&_markBitMap2),
489 _at_least_one_mark_complete(false), 490 _at_least_one_mark_complete(false),
490 491
491 _markStack(this), 492 _markStack(this),
510 _init_times(), 511 _init_times(),
511 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 512 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
512 _cleanup_times(), 513 _cleanup_times(),
513 _total_counting_time(0.0), 514 _total_counting_time(0.0),
514 _total_rs_scrub_time(0.0), 515 _total_rs_scrub_time(0.0),
515 _parallel_workers(NULL) { 516
517 _parallel_workers(NULL),
518
519 _count_card_bitmaps(NULL),
520 _count_marked_bytes(NULL) {
516 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 521 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
517 if (verbose_level < no_verbose) { 522 if (verbose_level < no_verbose) {
518 verbose_level = no_verbose; 523 verbose_level = no_verbose;
519 } 524 }
520 if (verbose_level > high_verbose) { 525 if (verbose_level > high_verbose) {
543 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 548 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
544 satb_qs.set_buffer_size(G1SATBBufferSize); 549 satb_qs.set_buffer_size(G1SATBBufferSize);
545 550
546 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 551 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
547 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 552 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
553
554 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
555 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
556
557 BitMap::idx_t card_bm_size = _card_bm.size();
548 558
549 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 559 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
550 _active_tasks = _max_task_num; 560 _active_tasks = _max_task_num;
551 for (int i = 0; i < (int) _max_task_num; ++i) { 561 for (int i = 0; i < (int) _max_task_num; ++i) {
552 CMTaskQueue* task_queue = new CMTaskQueue(); 562 CMTaskQueue* task_queue = new CMTaskQueue();
553 task_queue->initialize(); 563 task_queue->initialize();
554 _task_queues->register_queue(i, task_queue); 564 _task_queues->register_queue(i, task_queue);
555 565
556 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 566 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
567 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
568
569 _tasks[i] = new CMTask(i, this,
570 _count_marked_bytes[i],
571 &_count_card_bitmaps[i],
572 task_queue, _task_queues);
573
557 _accum_task_vtime[i] = 0.0; 574 _accum_task_vtime[i] = 0.0;
558 } 575 }
576
577 // Calculate the card number for the bottom of the heap. Used
578 // in biasing indexes into the accounting card bitmaps.
579 _heap_bottom_card_num =
580 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
581 CardTableModRefBS::card_shift);
582
583 // Clear all the liveness counting data
584 clear_all_count_data();
559 585
560 if (ConcGCThreads > ParallelGCThreads) { 586 if (ConcGCThreads > ParallelGCThreads) {
561 vm_exit_during_initialization("Can't have more ConcGCThreads " 587 vm_exit_during_initialization("Can't have more ConcGCThreads "
562 "than ParallelGCThreads."); 588 "than ParallelGCThreads.");
563 } 589 }
772 // them as guarantees at the beginning / end of the bitmap 798 // them as guarantees at the beginning / end of the bitmap
773 // clearing to get some checking in the product. 799 // clearing to get some checking in the product.
774 assert(cmThread()->during_cycle(), "invariant"); 800 assert(cmThread()->during_cycle(), "invariant");
775 assert(!g1h->mark_in_progress(), "invariant"); 801 assert(!g1h->mark_in_progress(), "invariant");
776 } 802 }
803
804 // Clear the liveness counting data
805 clear_all_count_data();
777 806
778 // Repeat the asserts from above. 807 // Repeat the asserts from above.
779 guarantee(cmThread()->during_cycle(), "invariant"); 808 guarantee(cmThread()->during_cycle(), "invariant");
780 guarantee(!g1h->mark_in_progress(), "invariant"); 809 guarantee(!g1h->mark_in_progress(), "invariant");
781 } 810 }
1204 clear_has_overflown(); 1233 clear_has_overflown();
1205 if (G1TraceMarkStackOverflow) { 1234 if (G1TraceMarkStackOverflow) {
1206 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1235 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1207 } 1236 }
1208 } else { 1237 } else {
1238 // Aggregate the per-task counting data that we have accumulated
1239 // while marking.
1240 aggregate_count_data();
1241
1209 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1242 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1210 // We're done with marking. 1243 // We're done with marking.
1211 // This is the end of the marking cycle, we're expected all 1244 // This is the end of the marking cycle, we're expected all
1212 // threads to have SATB queues with active set to true. 1245 // threads to have SATB queues with active set to true.
1213 satb_mq_set.set_active_all_threads(false, /* new active value */ 1246 satb_mq_set.set_active_all_threads(false, /* new active value */
1241 _remark_times.add((now - start) * 1000.0); 1274 _remark_times.add((now - start) * 1000.0);
1242 1275
1243 g1p->record_concurrent_mark_remark_end(); 1276 g1p->record_concurrent_mark_remark_end();
1244 } 1277 }
1245 1278
1246 #define CARD_BM_TEST_MODE 0 1279 // Used to calculate the # live objects per region
1247 1280 // for verification purposes
1248 class CalcLiveObjectsClosure: public HeapRegionClosure { 1281 class CalcLiveObjectsClosure: public HeapRegionClosure {
1249 1282
1250 CMBitMapRO* _bm; 1283 CMBitMapRO* _bm;
1251 ConcurrentMark* _cm; 1284 ConcurrentMark* _cm;
1252 bool _changed; 1285 BitMap* _region_bm;
1253 bool _yield; 1286 BitMap* _card_bm;
1254 size_t _words_done; 1287
1288 // Debugging
1289 size_t _tot_words_done;
1255 size_t _tot_live; 1290 size_t _tot_live;
1256 size_t _tot_used; 1291 size_t _tot_used;
1257 size_t _regions_done; 1292
1258 double _start_vtime_sec; 1293 size_t _region_marked_bytes;
1259 1294
1260 BitMap* _region_bm;
1261 BitMap* _card_bm;
1262 intptr_t _bottom_card_num; 1295 intptr_t _bottom_card_num;
1263 bool _final;
1264 1296
1265 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1297 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1266 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1298 assert(start_card_num <= last_card_num, "sanity");
1267 #if CARD_BM_TEST_MODE 1299 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1268 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1300 BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1269 #else 1301
1270 _card_bm->par_at_put(i - _bottom_card_num, 1); 1302 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1271 #endif 1303 _card_bm->par_at_put(i, 1);
1272 } 1304 }
1273 } 1305 }
1274 1306
1275 public: 1307 public:
1276 CalcLiveObjectsClosure(bool final, 1308 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1277 CMBitMapRO *bm, ConcurrentMark *cm,
1278 BitMap* region_bm, BitMap* card_bm) : 1309 BitMap* region_bm, BitMap* card_bm) :
1279 _bm(bm), _cm(cm), _changed(false), _yield(true), 1310 _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1280 _words_done(0), _tot_live(0), _tot_used(0), 1311 _region_marked_bytes(0), _tot_words_done(0),
1281 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1312 _tot_live(0), _tot_used(0),
1282 _regions_done(0), _start_vtime_sec(0.0) 1313 _bottom_card_num(cm->heap_bottom_card_num()) { }
1283 {
1284 _bottom_card_num =
1285 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1286 CardTableModRefBS::card_shift);
1287 }
1288 1314
1289 // It takes a region that's not empty (i.e., it has at least one 1315 // It takes a region that's not empty (i.e., it has at least one
1290 // live object in it and sets its corresponding bit on the region 1316 // live object in it and sets its corresponding bit on the region
1291 // bitmap to 1. If the region is "starts humongous" it will also set 1317 // bitmap to 1. If the region is "starts humongous" it will also set
1292 // to 1 the bits on the region bitmap that correspond to its 1318 // to 1 the bits on the region bitmap that correspond to its
1298 if (!hr->startsHumongous()) { 1324 if (!hr->startsHumongous()) {
1299 // Normal (non-humongous) case: just set the bit. 1325 // Normal (non-humongous) case: just set the bit.
1300 _region_bm->par_at_put((BitMap::idx_t) index, true); 1326 _region_bm->par_at_put((BitMap::idx_t) index, true);
1301 } else { 1327 } else {
1302 // Starts humongous case: calculate how many regions are part of 1328 // Starts humongous case: calculate how many regions are part of
1303 // this humongous region and then set the bit range. It might 1329 // this humongous region and then set the bit range.
1304 // have been a bit more efficient to look at the object that
1305 // spans these humongous regions to calculate their number from
1306 // the object's size. However, it's a good idea to calculate
1307 // this based on the metadata itself, and not the region
1308 // contents, so that this code is not aware of what goes into
1309 // the humongous regions (in case this changes in the future).
1310 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1330 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1311 size_t end_index = index + 1; 1331 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1312 while (end_index < g1h->n_regions()) { 1332 size_t end_index = last_hr->hrs_index() + 1;
1313 HeapRegion* chr = g1h->region_at(end_index);
1314 if (!chr->continuesHumongous()) break;
1315 end_index += 1;
1316 }
1317 _region_bm->par_at_put_range((BitMap::idx_t) index, 1333 _region_bm->par_at_put_range((BitMap::idx_t) index,
1318 (BitMap::idx_t) end_index, true); 1334 (BitMap::idx_t) end_index, true);
1319 } 1335 }
1320 } 1336 }
1321 1337
1322 bool doHeapRegion(HeapRegion* hr) { 1338 bool doHeapRegion(HeapRegion* hr) {
1323 if (!_final && _regions_done == 0) {
1324 _start_vtime_sec = os::elapsedVTime();
1325 }
1326 1339
1327 if (hr->continuesHumongous()) { 1340 if (hr->continuesHumongous()) {
1328 // We will ignore these here and process them when their 1341 // We will ignore these here and process them when their
1329 // associated "starts humongous" region is processed (see 1342 // associated "starts humongous" region is processed (see
1330 // set_bit_for_heap_region()). Note that we cannot rely on their 1343 // set_bit_for_heap_region()). Note that we cannot rely on their
1334 // before its associated "starts humongous". 1347 // before its associated "starts humongous".
1335 return false; 1348 return false;
1336 } 1349 }
1337 1350
1338 HeapWord* nextTop = hr->next_top_at_mark_start(); 1351 HeapWord* nextTop = hr->next_top_at_mark_start();
1339 HeapWord* start = hr->top_at_conc_mark_count(); 1352 HeapWord* start = hr->bottom();
1340 assert(hr->bottom() <= start && start <= hr->end() && 1353
1341 hr->bottom() <= nextTop && nextTop <= hr->end() && 1354 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1342 start <= nextTop, 1355 err_msg("Preconditions not met - "
1343 "Preconditions."); 1356 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1344 // Otherwise, record the number of word's we'll examine. 1357 start, nextTop, hr->end()));
1358
1359 // Record the number of word's we'll examine.
1345 size_t words_done = (nextTop - start); 1360 size_t words_done = (nextTop - start);
1361
1346 // Find the first marked object at or after "start". 1362 // Find the first marked object at or after "start".
1347 start = _bm->getNextMarkedWordAddress(start, nextTop); 1363 start = _bm->getNextMarkedWordAddress(start, nextTop);
1364
1348 size_t marked_bytes = 0; 1365 size_t marked_bytes = 0;
1349 1366
1350 // Below, the term "card num" means the result of shifting an address 1367 // Below, the term "card num" means the result of shifting an address
1351 // by the card shift -- address 0 corresponds to card number 0. One 1368 // by the card shift -- address 0 corresponds to card number 0. One
1352 // must subtract the card num of the bottom of the heap to obtain a 1369 // must subtract the card num of the bottom of the heap to obtain a
1353 // card table index. 1370 // card table index.
1371
1354 // The first card num of the sequence of live cards currently being 1372 // The first card num of the sequence of live cards currently being
1355 // constructed. -1 ==> no sequence. 1373 // constructed. -1 ==> no sequence.
1356 intptr_t start_card_num = -1; 1374 intptr_t start_card_num = -1;
1375
1357 // The last card num of the sequence of live cards currently being 1376 // The last card num of the sequence of live cards currently being
1358 // constructed. -1 ==> no sequence. 1377 // constructed. -1 ==> no sequence.
1359 intptr_t last_card_num = -1; 1378 intptr_t last_card_num = -1;
1360 1379
1361 while (start < nextTop) { 1380 while (start < nextTop) {
1362 if (_yield && _cm->do_yield_check()) {
1363 // We yielded. It might be for a full collection, in which case
1364 // all bets are off; terminate the traversal.
1365 if (_cm->has_aborted()) {
1366 _changed = false;
1367 return true;
1368 } else {
1369 // Otherwise, it might be a collection pause, and the region
1370 // we're looking at might be in the collection set. We'll
1371 // abandon this region.
1372 return false;
1373 }
1374 }
1375 oop obj = oop(start); 1381 oop obj = oop(start);
1376 int obj_sz = obj->size(); 1382 int obj_sz = obj->size();
1383
1377 // The card num of the start of the current object. 1384 // The card num of the start of the current object.
1378 intptr_t obj_card_num = 1385 intptr_t obj_card_num =
1379 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1386 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1380
1381 HeapWord* obj_last = start + obj_sz - 1; 1387 HeapWord* obj_last = start + obj_sz - 1;
1382 intptr_t obj_last_card_num = 1388 intptr_t obj_last_card_num =
1383 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1389 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1384 1390
1385 if (obj_card_num != last_card_num) { 1391 if (obj_card_num != last_card_num) {
1393 // Mark the last run, and start a new one. 1399 // Mark the last run, and start a new one.
1394 mark_card_num_range(start_card_num, last_card_num); 1400 mark_card_num_range(start_card_num, last_card_num);
1395 start_card_num = obj_card_num; 1401 start_card_num = obj_card_num;
1396 } 1402 }
1397 } 1403 }
1398 #if CARD_BM_TEST_MODE
1399 /*
1400 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1401 obj_card_num - _bottom_card_num,
1402 obj_last_card_num - _bottom_card_num);
1403 */
1404 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1405 _card_bm->par_at_put(j - _bottom_card_num, 1);
1406 }
1407 #endif
1408 } 1404 }
1409 // In any case, we set the last card num. 1405 // In any case, we set the last card num.
1410 last_card_num = obj_last_card_num; 1406 last_card_num = obj_last_card_num;
1411 1407
1412 marked_bytes += (size_t)obj_sz * HeapWordSize; 1408 marked_bytes += (size_t)obj_sz * HeapWordSize;
1409
1413 // Find the next marked object after this one. 1410 // Find the next marked object after this one.
1414 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1411 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1415 _changed = true; 1412 }
1416 } 1413
1417 // Handle the last range, if any. 1414 // Handle the last range, if any.
1418 if (start_card_num != -1) { 1415 if (start_card_num != -1) {
1419 mark_card_num_range(start_card_num, last_card_num); 1416 mark_card_num_range(start_card_num, last_card_num);
1420 } 1417 }
1421 if (_final) { 1418
1422 // Mark the allocated-since-marking portion... 1419 // Mark the allocated-since-marking portion...
1423 HeapWord* tp = hr->top(); 1420 HeapWord* top = hr->top();
1424 if (nextTop < tp) { 1421 if (nextTop < top) {
1425 start_card_num = 1422 start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1426 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1423 last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1427 last_card_num = 1424
1428 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1425 mark_card_num_range(start_card_num, last_card_num);
1429 mark_card_num_range(start_card_num, last_card_num); 1426
1430 // This definitely means the region has live objects. 1427 // This definitely means the region has live objects.
1431 set_bit_for_region(hr); 1428 set_bit_for_region(hr);
1432 } 1429 }
1433 } 1430
1434
1435 hr->add_to_marked_bytes(marked_bytes);
1436 // Update the live region bitmap. 1431 // Update the live region bitmap.
1437 if (marked_bytes > 0) { 1432 if (marked_bytes > 0) {
1438 set_bit_for_region(hr); 1433 set_bit_for_region(hr);
1439 } 1434 }
1440 hr->set_top_at_conc_mark_count(nextTop); 1435
1436 // Set the marked bytes for the current region so that
1437 // it can be queried by a calling verificiation routine
1438 _region_marked_bytes = marked_bytes;
1439
1441 _tot_live += hr->next_live_bytes(); 1440 _tot_live += hr->next_live_bytes();
1442 _tot_used += hr->used(); 1441 _tot_used += hr->used();
1443 _words_done = words_done; 1442 _tot_words_done = words_done;
1444 1443
1445 if (!_final) { 1444 return false;
1446 ++_regions_done; 1445 }
1447 if (_regions_done % 10 == 0) { 1446
1448 double end_vtime_sec = os::elapsedVTime(); 1447 size_t region_marked_bytes() const { return _region_marked_bytes; }
1449 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1448
1450 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1449 // Debugging
1451 jlong sleep_time_ms = 1450 size_t tot_words_done() const { return _tot_words_done; }
1452 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1451 size_t tot_live() const { return _tot_live; }
1453 os::sleep(Thread::current(), sleep_time_ms, false); 1452 size_t tot_used() const { return _tot_used; }
1454 _start_vtime_sec = end_vtime_sec; 1453 };
1454
1455 // Heap region closure used for verifying the counting data
1456 // that was accumulated concurrently and aggregated during
1457 // the remark pause. This closure is applied to the heap
1458 // regions during the STW cleanup pause.
1459
1460 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1461 ConcurrentMark* _cm;
1462 CalcLiveObjectsClosure _calc_cl;
1463 BitMap* _region_bm; // Region BM to be verified
1464 BitMap* _card_bm; // Card BM to be verified
1465 bool _verbose; // verbose output?
1466
1467 BitMap* _exp_region_bm; // Expected Region BM values
1468 BitMap* _exp_card_bm; // Expected card BM values
1469
1470 int _failures;
1471
1472 public:
1473 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1474 BitMap* region_bm,
1475 BitMap* card_bm,
1476 BitMap* exp_region_bm,
1477 BitMap* exp_card_bm,
1478 bool verbose) :
1479 _cm(cm),
1480 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1481 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1482 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1483 _failures(0) { }
1484
1485 int failures() const { return _failures; }
1486
1487 bool doHeapRegion(HeapRegion* hr) {
1488 if (hr->continuesHumongous()) {
1489 // We will ignore these here and process them when their
1490 // associated "starts humongous" region is processed (see
1491 // set_bit_for_heap_region()). Note that we cannot rely on their
1492 // associated "starts humongous" region to have their bit set to
1493 // 1 since, due to the region chunking in the parallel region
1494 // iteration, a "continues humongous" region might be visited
1495 // before its associated "starts humongous".
1496 return false;
1497 }
1498
1499 int failures = 0;
1500
1501 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1502 // this region and set the corresponding bits in the expected region
1503 // and card bitmaps.
1504 bool res = _calc_cl.doHeapRegion(hr);
1505 assert(res == false, "should be continuing");
1506
1507 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1508 Mutex::_no_safepoint_check_flag);
1509
1510 // Verify that _top_at_conc_count == ntams
1511 if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1512 if (_verbose) {
1513 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
1514 "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
1515 hr->hrs_index(), hr->next_top_at_mark_start(),
1516 hr->top_at_conc_mark_count());
1517 }
1518 failures += 1;
1519 }
1520
1521 // Verify the marked bytes for this region.
1522 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1523 size_t act_marked_bytes = hr->next_marked_bytes();
1524
1525 // We're not OK if expected marked bytes > actual marked bytes. It means
1526 // we have missed accounting some objects during the actual marking.
1527 if (exp_marked_bytes > act_marked_bytes) {
1528 if (_verbose) {
1529 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
1530 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1531 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1532 }
1533 failures += 1;
1534 }
1535
1536 // Verify the bit, for this region, in the actual and expected
1537 // (which was just calculated) region bit maps.
1538 // We're not OK if the bit in the calculated expected region
1539 // bitmap is set and the bit in the actual region bitmap is not.
1540 BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1541
1542 bool expected = _exp_region_bm->at(index);
1543 bool actual = _region_bm->at(index);
1544 if (expected && !actual) {
1545 if (_verbose) {
1546 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
1547 "expected: %d, actual: %d",
1548 hr->hrs_index(), expected, actual);
1549 }
1550 failures += 1;
1551 }
1552
1553 // Verify that the card bit maps for the cards spanned by the current
1554 // region match. We have an error if we have a set bit in the expected
1555 // bit map and the corresponding bit in the actual bitmap is not set.
1556
1557 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1558 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1559
1560 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1561 expected = _exp_card_bm->at(i);
1562 actual = _card_bm->at(i);
1563
1564 if (expected && !actual) {
1565 if (_verbose) {
1566 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
1567 "expected: %d, actual: %d",
1568 hr->hrs_index(), i, expected, actual);
1455 } 1569 }
1570 failures += 1;
1456 } 1571 }
1457 } 1572 }
1458 1573
1574 if (failures > 0 && _verbose) {
1575 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1576 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1577 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1578 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1579 }
1580
1581 _failures += failures;
1582
1583 // We could stop iteration over the heap when we
1584 // find the first voilating region by returning true.
1459 return false; 1585 return false;
1460 } 1586 }
1461
1462 bool changed() { return _changed; }
1463 void reset() { _changed = false; _words_done = 0; }
1464 void no_yield() { _yield = false; }
1465 size_t words_done() { return _words_done; }
1466 size_t tot_live() { return _tot_live; }
1467 size_t tot_used() { return _tot_used; }
1468 }; 1587 };
1469 1588
1470 1589
1471 void ConcurrentMark::calcDesiredRegions() { 1590 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1472 _region_bm.clear(); 1591 protected:
1473 _card_bm.clear(); 1592 G1CollectedHeap* _g1h;
1474 CalcLiveObjectsClosure calccl(false /*final*/, 1593 ConcurrentMark* _cm;
1475 nextMarkBitMap(), this, 1594 BitMap* _actual_region_bm;
1476 &_region_bm, &_card_bm); 1595 BitMap* _actual_card_bm;
1477 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1596
1478 g1h->heap_region_iterate(&calccl); 1597 uint _n_workers;
1479 1598
1480 do { 1599 BitMap* _expected_region_bm;
1481 calccl.reset(); 1600 BitMap* _expected_card_bm;
1482 g1h->heap_region_iterate(&calccl); 1601
1483 } while (calccl.changed()); 1602 int _failures;
1484 } 1603 bool _verbose;
1604
1605 public:
1606 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1607 BitMap* region_bm, BitMap* card_bm,
1608 BitMap* expected_region_bm, BitMap* expected_card_bm)
1609 : AbstractGangTask("G1 verify final counting"),
1610 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1611 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1612 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1613 _failures(0), _verbose(false),
1614 _n_workers(0) {
1615 assert(VerifyDuringGC, "don't call this otherwise");
1616
1617 // Use the value already set as the number of active threads
1618 // in the call to run_task().
1619 if (G1CollectedHeap::use_parallel_gc_threads()) {
1620 assert( _g1h->workers()->active_workers() > 0,
1621 "Should have been previously set");
1622 _n_workers = _g1h->workers()->active_workers();
1623 } else {
1624 _n_workers = 1;
1625 }
1626
1627 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1628 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1629
1630 _verbose = _cm->verbose_medium();
1631 }
1632
1633 void work(uint worker_id) {
1634 assert(worker_id < _n_workers, "invariant");
1635
1636 VerifyLiveObjectDataHRClosure verify_cl(_cm,
1637 _actual_region_bm, _actual_card_bm,
1638 _expected_region_bm,
1639 _expected_card_bm,
1640 _verbose);
1641
1642 if (G1CollectedHeap::use_parallel_gc_threads()) {
1643 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1644 worker_id,
1645 _n_workers,
1646 HeapRegion::VerifyCountClaimValue);
1647 } else {
1648 _g1h->heap_region_iterate(&verify_cl);
1649 }
1650
1651 Atomic::add(verify_cl.failures(), &_failures);
1652 }
1653
1654 int failures() const { return _failures; }
1655 };
1656
1657 // Final update of count data (during cleanup).
1658 // Adds [top_at_count, NTAMS) to the marked bytes for each
1659 // region. Sets the bits in the card bitmap corresponding
1660 // to the interval [top_at_count, top], and sets the
1661 // liveness bit for each region containing live data
1662 // in the region bitmap.
1663
1664 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1665 ConcurrentMark* _cm;
1666 BitMap* _region_bm;
1667 BitMap* _card_bm;
1668
1669 size_t _total_live_bytes;
1670 size_t _total_used_bytes;
1671 size_t _total_words_done;
1672
1673 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1674 assert(start_idx <= last_idx, "sanity");
1675
1676 // Set the inclusive bit range [start_idx, last_idx].
1677 // For small ranges (up to 8 cards) use a simple loop; otherwise
1678 // use par_at_put_range.
1679 if ((last_idx - start_idx) <= 8) {
1680 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1681 _card_bm->par_set_bit(i);
1682 }
1683 } else {
1684 assert(last_idx < _card_bm->size(), "sanity");
1685 // Note BitMap::par_at_put_range() is exclusive.
1686 _card_bm->par_at_put_range(start_idx, last_idx+1, true);
1687 }
1688 }
1689
1690 // It takes a region that's not empty (i.e., it has at least one
1691 // live object in it and sets its corresponding bit on the region
1692 // bitmap to 1. If the region is "starts humongous" it will also set
1693 // to 1 the bits on the region bitmap that correspond to its
1694 // associated "continues humongous" regions.
1695 void set_bit_for_region(HeapRegion* hr) {
1696 assert(!hr->continuesHumongous(), "should have filtered those out");
1697
1698 size_t index = hr->hrs_index();
1699 if (!hr->startsHumongous()) {
1700 // Normal (non-humongous) case: just set the bit.
1701 _region_bm->par_set_bit((BitMap::idx_t) index);
1702 } else {
1703 // Starts humongous case: calculate how many regions are part of
1704 // this humongous region and then set the bit range.
1705 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1706 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1707 size_t end_index = last_hr->hrs_index() + 1;
1708 _region_bm->par_at_put_range((BitMap::idx_t) index,
1709 (BitMap::idx_t) end_index, true);
1710 }
1711 }
1712
1713 public:
1714 FinalCountDataUpdateClosure(ConcurrentMark* cm,
1715 BitMap* region_bm,
1716 BitMap* card_bm) :
1717 _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1718 _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
1719
1720 bool doHeapRegion(HeapRegion* hr) {
1721
1722 if (hr->continuesHumongous()) {
1723 // We will ignore these here and process them when their
1724 // associated "starts humongous" region is processed (see
1725 // set_bit_for_heap_region()). Note that we cannot rely on their
1726 // associated "starts humongous" region to have their bit set to
1727 // 1 since, due to the region chunking in the parallel region
1728 // iteration, a "continues humongous" region might be visited
1729 // before its associated "starts humongous".
1730 return false;
1731 }
1732
1733 HeapWord* start = hr->top_at_conc_mark_count();
1734 HeapWord* ntams = hr->next_top_at_mark_start();
1735 HeapWord* top = hr->top();
1736
1737 assert(hr->bottom() <= start && start <= hr->end() &&
1738 hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1739
1740 size_t words_done = ntams - hr->bottom();
1741
1742 if (start < ntams) {
1743 // Region was changed between remark and cleanup pauses
1744 // We need to add (ntams - start) to the marked bytes
1745 // for this region, and set bits for the range
1746 // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
1747 size_t live_bytes = (ntams - start) * HeapWordSize;
1748 hr->add_to_marked_bytes(live_bytes);
1749
1750 // Record the new top at conc count
1751 hr->set_top_at_conc_mark_count(ntams);
1752
1753 // The setting of the bits in the card bitmap takes place below
1754 }
1755
1756 // Mark the allocated-since-marking portion...
1757 if (ntams < top) {
1758 // This definitely means the region has live objects.
1759 set_bit_for_region(hr);
1760 }
1761
1762 // Now set the bits for [start, top]
1763 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1764 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
1765 set_card_bitmap_range(start_idx, last_idx);
1766
1767 // Set the bit for the region if it contains live data
1768 if (hr->next_marked_bytes() > 0) {
1769 set_bit_for_region(hr);
1770 }
1771
1772 _total_words_done += words_done;
1773 _total_used_bytes += hr->used();
1774 _total_live_bytes += hr->next_marked_bytes();
1775
1776 return false;
1777 }
1778
1779 size_t total_words_done() const { return _total_words_done; }
1780 size_t total_live_bytes() const { return _total_live_bytes; }
1781 size_t total_used_bytes() const { return _total_used_bytes; }
1782 };
1485 1783
1486 class G1ParFinalCountTask: public AbstractGangTask { 1784 class G1ParFinalCountTask: public AbstractGangTask {
1487 protected: 1785 protected:
1488 G1CollectedHeap* _g1h; 1786 G1CollectedHeap* _g1h;
1489 CMBitMap* _bm; 1787 ConcurrentMark* _cm;
1788 BitMap* _actual_region_bm;
1789 BitMap* _actual_card_bm;
1790
1490 uint _n_workers; 1791 uint _n_workers;
1792
1491 size_t *_live_bytes; 1793 size_t *_live_bytes;
1492 size_t *_used_bytes; 1794 size_t *_used_bytes;
1493 BitMap* _region_bm; 1795
1494 BitMap* _card_bm;
1495 public: 1796 public:
1496 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1797 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1497 BitMap* region_bm, BitMap* card_bm) 1798 : AbstractGangTask("G1 final counting"),
1498 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1799 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1499 _bm(bm), _region_bm(region_bm), _card_bm(card_bm), 1800 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1500 _n_workers(0) 1801 _n_workers(0) {
1501 {
1502 // Use the value already set as the number of active threads 1802 // Use the value already set as the number of active threads
1503 // in the call to run_task(). Needed for the allocation of 1803 // in the call to run_task(). Needed for the allocation of
1504 // _live_bytes and _used_bytes. 1804 // _live_bytes and _used_bytes.
1505 if (G1CollectedHeap::use_parallel_gc_threads()) { 1805 if (G1CollectedHeap::use_parallel_gc_threads()) {
1506 assert( _g1h->workers()->active_workers() > 0, 1806 assert( _g1h->workers()->active_workers() > 0,
1518 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1818 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1519 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1819 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1520 } 1820 }
1521 1821
1522 void work(uint worker_id) { 1822 void work(uint worker_id) {
1523 CalcLiveObjectsClosure calccl(true /*final*/, 1823 assert(worker_id < _n_workers, "invariant");
1524 _bm, _g1h->concurrent_mark(), 1824
1525 _region_bm, _card_bm); 1825 FinalCountDataUpdateClosure final_update_cl(_cm,
1526 calccl.no_yield(); 1826 _actual_region_bm,
1827 _actual_card_bm);
1828
1527 if (G1CollectedHeap::use_parallel_gc_threads()) { 1829 if (G1CollectedHeap::use_parallel_gc_threads()) {
1528 _g1h->heap_region_par_iterate_chunked(&calccl, worker_id, 1830 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1529 (int) _n_workers, 1831 worker_id,
1832 _n_workers,
1530 HeapRegion::FinalCountClaimValue); 1833 HeapRegion::FinalCountClaimValue);
1531 } else { 1834 } else {
1532 _g1h->heap_region_iterate(&calccl); 1835 _g1h->heap_region_iterate(&final_update_cl);
1533 } 1836 }
1534 assert(calccl.complete(), "Shouldn't have yielded!"); 1837
1535 1838 _live_bytes[worker_id] = final_update_cl.total_live_bytes();
1536 assert(worker_id < _n_workers, "invariant"); 1839 _used_bytes[worker_id] = final_update_cl.total_used_bytes();
1537 _live_bytes[worker_id] = calccl.tot_live(); 1840 }
1538 _used_bytes[worker_id] = calccl.tot_used(); 1841
1539 }
1540 size_t live_bytes() { 1842 size_t live_bytes() {
1541 size_t live_bytes = 0; 1843 size_t live_bytes = 0;
1542 for (uint i = 0; i < _n_workers; ++i) 1844 for (uint i = 0; i < _n_workers; ++i)
1543 live_bytes += _live_bytes[i]; 1845 live_bytes += _live_bytes[i];
1544 return live_bytes; 1846 return live_bytes;
1545 } 1847 }
1848
1546 size_t used_bytes() { 1849 size_t used_bytes() {
1547 size_t used_bytes = 0; 1850 size_t used_bytes = 0;
1548 for (uint i = 0; i < _n_workers; ++i) 1851 for (uint i = 0; i < _n_workers; ++i)
1549 used_bytes += _used_bytes[i]; 1852 used_bytes += _used_bytes[i];
1550 return used_bytes; 1853 return used_bytes;
1703 BitMap* _card_bm; 2006 BitMap* _card_bm;
1704 public: 2007 public:
1705 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 2008 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1706 BitMap* region_bm, BitMap* card_bm) : 2009 BitMap* region_bm, BitMap* card_bm) :
1707 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 2010 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1708 _region_bm(region_bm), _card_bm(card_bm) 2011 _region_bm(region_bm), _card_bm(card_bm) { }
1709 {}
1710 2012
1711 void work(uint worker_id) { 2013 void work(uint worker_id) {
1712 if (G1CollectedHeap::use_parallel_gc_threads()) { 2014 if (G1CollectedHeap::use_parallel_gc_threads()) {
1713 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 2015 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1714 HeapRegion::ScrubRemSetClaimValue); 2016 HeapRegion::ScrubRemSetClaimValue);
1751 HeapRegionRemSet::reset_for_cleanup_tasks(); 2053 HeapRegionRemSet::reset_for_cleanup_tasks();
1752 2054
1753 uint n_workers; 2055 uint n_workers;
1754 2056
1755 // Do counting once more with the world stopped for good measure. 2057 // Do counting once more with the world stopped for good measure.
1756 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 2058 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1757 &_region_bm, &_card_bm); 2059
1758 if (G1CollectedHeap::use_parallel_gc_threads()) { 2060 if (G1CollectedHeap::use_parallel_gc_threads()) {
1759 assert(g1h->check_heap_region_claim_values( 2061 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1760 HeapRegion::InitialClaimValue),
1761 "sanity check"); 2062 "sanity check");
1762 2063
1763 g1h->set_par_threads(); 2064 g1h->set_par_threads();
1764 n_workers = g1h->n_par_threads(); 2065 n_workers = g1h->n_par_threads();
1765 assert(g1h->n_par_threads() == n_workers, 2066 assert(g1h->n_par_threads() == n_workers,
1766 "Should not have been reset"); 2067 "Should not have been reset");
1767 g1h->workers()->run_task(&g1_par_count_task); 2068 g1h->workers()->run_task(&g1_par_count_task);
1768 // Done with the parallel phase so reset to 0. 2069 // Done with the parallel phase so reset to 0.
1769 g1h->set_par_threads(0); 2070 g1h->set_par_threads(0);
1770 2071
1771 assert(g1h->check_heap_region_claim_values( 2072 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1772 HeapRegion::FinalCountClaimValue),
1773 "sanity check"); 2073 "sanity check");
1774 } else { 2074 } else {
1775 n_workers = 1; 2075 n_workers = 1;
1776 g1_par_count_task.work(0); 2076 g1_par_count_task.work(0);
2077 }
2078
2079 if (VerifyDuringGC) {
2080 // Verify that the counting data accumulated during marking matches
2081 // that calculated by walking the marking bitmap.
2082
2083 // Bitmaps to hold expected values
2084 BitMap expected_region_bm(_region_bm.size(), false);
2085 BitMap expected_card_bm(_card_bm.size(), false);
2086
2087 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2088 &_region_bm,
2089 &_card_bm,
2090 &expected_region_bm,
2091 &expected_card_bm);
2092
2093 if (G1CollectedHeap::use_parallel_gc_threads()) {
2094 g1h->set_par_threads((int)n_workers);
2095 g1h->workers()->run_task(&g1_par_verify_task);
2096 // Done with the parallel phase so reset to 0.
2097 g1h->set_par_threads(0);
2098
2099 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2100 "sanity check");
2101 } else {
2102 g1_par_verify_task.work(0);
2103 }
2104
2105 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1777 } 2106 }
1778 2107
1779 size_t known_garbage_bytes = 2108 size_t known_garbage_bytes =
1780 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 2109 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1781 g1p->set_known_garbage_bytes(known_garbage_bytes); 2110 g1p->set_known_garbage_bytes(known_garbage_bytes);
1966 } 2295 }
1967 2296
1968 class G1CMKeepAliveClosure: public OopClosure { 2297 class G1CMKeepAliveClosure: public OopClosure {
1969 G1CollectedHeap* _g1; 2298 G1CollectedHeap* _g1;
1970 ConcurrentMark* _cm; 2299 ConcurrentMark* _cm;
1971 CMBitMap* _bitMap;
1972 public: 2300 public:
1973 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 2301 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
1974 CMBitMap* bitMap) : 2302 _g1(g1), _cm(cm) {
1975 _g1(g1), _cm(cm), 2303 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
1976 _bitMap(bitMap) {} 2304 }
1977 2305
1978 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2306 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1979 virtual void do_oop( oop* p) { do_oop_work(p); } 2307 virtual void do_oop( oop* p) { do_oop_work(p); }
1980 2308
1981 template <class T> void do_oop_work(T* p) { 2309 template <class T> void do_oop_work(T* p) {
1987 "*"PTR_FORMAT" = "PTR_FORMAT, 2315 "*"PTR_FORMAT" = "PTR_FORMAT,
1988 p, (void*) obj); 2316 p, (void*) obj);
1989 } 2317 }
1990 2318
1991 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2319 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1992 _bitMap->mark(addr); 2320 _cm->mark_and_count(obj);
1993 _cm->mark_stack_push(obj); 2321 _cm->mark_stack_push(obj);
1994 } 2322 }
1995 } 2323 }
1996 }; 2324 };
1997 2325
1998 class G1CMDrainMarkingStackClosure: public VoidClosure { 2326 class G1CMDrainMarkingStackClosure: public VoidClosure {
2327 ConcurrentMark* _cm;
1999 CMMarkStack* _markStack; 2328 CMMarkStack* _markStack;
2000 CMBitMap* _bitMap;
2001 G1CMKeepAliveClosure* _oopClosure; 2329 G1CMKeepAliveClosure* _oopClosure;
2002 public: 2330 public:
2003 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2331 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2004 G1CMKeepAliveClosure* oopClosure) : 2332 G1CMKeepAliveClosure* oopClosure) :
2005 _bitMap(bitMap), 2333 _cm(cm),
2006 _markStack(markStack), 2334 _markStack(markStack),
2007 _oopClosure(oopClosure) 2335 _oopClosure(oopClosure) { }
2008 {}
2009 2336
2010 void do_void() { 2337 void do_void() {
2011 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2338 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2012 } 2339 }
2013 }; 2340 };
2014 2341
2015 // 'Keep Alive' closure used by parallel reference processing. 2342 // 'Keep Alive' closure used by parallel reference processing.
2016 // An instance of this closure is used in the parallel reference processing 2343 // An instance of this closure is used in the parallel reference processing
2085 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2412 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2086 ConcurrentMark* _cm; 2413 ConcurrentMark* _cm;
2087 CMTask* _task; 2414 CMTask* _task;
2088 public: 2415 public:
2089 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2416 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2090 _cm(cm), _task(task) 2417 _cm(cm), _task(task) { }
2091 {}
2092 2418
2093 void do_void() { 2419 void do_void() {
2094 do { 2420 do {
2095 if (_cm->verbose_high()) { 2421 if (_cm->verbose_high()) {
2096 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2422 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2225 2551
2226 // Process weak references. 2552 // Process weak references.
2227 rp->setup_policy(clear_all_soft_refs); 2553 rp->setup_policy(clear_all_soft_refs);
2228 assert(_markStack.isEmpty(), "mark stack should be empty"); 2554 assert(_markStack.isEmpty(), "mark stack should be empty");
2229 2555
2230 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2556 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2231 G1CMDrainMarkingStackClosure 2557 G1CMDrainMarkingStackClosure
2232 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2558 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2233 2559
2234 // We use the work gang from the G1CollectedHeap and we utilize all 2560 // We use the work gang from the G1CollectedHeap and we utilize all
2235 // the worker threads. 2561 // the worker threads.
2236 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2562 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2237 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2563 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2598 // This note is for drainAllSATBBuffers and the code in between. 2924 // This note is for drainAllSATBBuffers and the code in between.
2599 // In the future we could reuse a task to do this work during an 2925 // In the future we could reuse a task to do this work during an
2600 // evacuation pause (since now tasks are not active and can be claimed 2926 // evacuation pause (since now tasks are not active and can be claimed
2601 // during an evacuation pause). This was a late change to the code and 2927 // during an evacuation pause). This was a late change to the code and
2602 // is currently not being taken advantage of. 2928 // is currently not being taken advantage of.
2603
2604 class CMGlobalObjectClosure : public ObjectClosure {
2605 private:
2606 ConcurrentMark* _cm;
2607
2608 public:
2609 void do_object(oop obj) {
2610 _cm->deal_with_reference(obj);
2611 }
2612
2613 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2614 };
2615 2929
2616 void ConcurrentMark::deal_with_reference(oop obj) { 2930 void ConcurrentMark::deal_with_reference(oop obj) {
2617 if (verbose_high()) { 2931 if (verbose_high()) {
2618 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2932 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2619 (void*) obj); 2933 (void*) obj);
2655 } 2969 }
2656 } 2970 }
2657 } 2971 }
2658 } 2972 }
2659 2973
2974 class CMGlobalObjectClosure : public ObjectClosure {
2975 private:
2976 ConcurrentMark* _cm;
2977
2978 public:
2979 void do_object(oop obj) {
2980 _cm->deal_with_reference(obj);
2981 }
2982
2983 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2984 };
2985
2660 void ConcurrentMark::drainAllSATBBuffers() { 2986 void ConcurrentMark::drainAllSATBBuffers() {
2661 guarantee(false, "drainAllSATBBuffers(): don't call this any more"); 2987 guarantee(false, "drainAllSATBBuffers(): don't call this any more");
2662 2988
2663 CMGlobalObjectClosure oc(this); 2989 CMGlobalObjectClosure oc(this);
2664 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2990 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2674 // called during an evacuation pause 3000 // called during an evacuation pause
2675 satb_mq_set.iterate_closure_all_threads(); 3001 satb_mq_set.iterate_closure_all_threads();
2676 3002
2677 satb_mq_set.set_closure(NULL); 3003 satb_mq_set.set_closure(NULL);
2678 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 3004 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2679 }
2680
2681 void ConcurrentMark::clear(oop p) {
2682 assert(p != NULL && p->is_oop(), "expected an oop");
2683 HeapWord* addr = (HeapWord*)p;
2684 assert(addr >= _nextMarkBitMap->startWord() ||
2685 addr < _nextMarkBitMap->endWord(), "in a region");
2686
2687 _nextMarkBitMap->clear(addr);
2688 } 3005 }
2689 3006
2690 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 3007 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2691 // Note we are overriding the read-only view of the prev map here, via 3008 // Note we are overriding the read-only view of the prev map here, via
2692 // the cast. 3009 // the cast.
2998 // Clear any partial regions from the CMTasks 3315 // Clear any partial regions from the CMTasks
2999 _tasks[i]->clear_aborted_region(); 3316 _tasks[i]->clear_aborted_region();
3000 } 3317 }
3001 } 3318 }
3002 3319
3320 // Aggregate the counting data that was constructed concurrently
3321 // with marking.
3322 class AggregateCountDataHRClosure: public HeapRegionClosure {
3323 ConcurrentMark* _cm;
3324 BitMap* _cm_card_bm;
3325 size_t _max_task_num;
3326
3327 public:
3328 AggregateCountDataHRClosure(ConcurrentMark *cm,
3329 BitMap* cm_card_bm,
3330 size_t max_task_num) :
3331 _cm(cm), _cm_card_bm(cm_card_bm),
3332 _max_task_num(max_task_num) { }
3333
3334 bool is_card_aligned(HeapWord* p) {
3335 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
3336 }
3337
3338 bool doHeapRegion(HeapRegion* hr) {
3339 if (hr->continuesHumongous()) {
3340 // We will ignore these here and process them when their
3341 // associated "starts humongous" region is processed.
3342 // Note that we cannot rely on their associated
3343 // "starts humongous" region to have their bit set to 1
3344 // since, due to the region chunking in the parallel region
3345 // iteration, a "continues humongous" region might be visited
3346 // before its associated "starts humongous".
3347 return false;
3348 }
3349
3350 HeapWord* start = hr->bottom();
3351 HeapWord* limit = hr->next_top_at_mark_start();
3352 HeapWord* end = hr->end();
3353
3354 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3355 err_msg("Preconditions not met - "
3356 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3357 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3358 start, limit, hr->top(), hr->end()));
3359
3360 assert(hr->next_marked_bytes() == 0, "Precondition");
3361
3362 if (start == limit) {
3363 // NTAMS of this region has not been set so nothing to do.
3364 return false;
3365 }
3366
3367 assert(is_card_aligned(start), "sanity");
3368 assert(is_card_aligned(end), "sanity");
3369
3370 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3371 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3372 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3373
3374 // If ntams is not card aligned then we bump the index for
3375 // limit so that we get the card spanning ntams.
3376 if (!is_card_aligned(limit)) {
3377 limit_idx += 1;
3378 }
3379
3380 assert(limit_idx <= end_idx, "or else use atomics");
3381
3382 // Aggregate the "stripe" in the count data associated with hr.
3383 size_t hrs_index = hr->hrs_index();
3384 size_t marked_bytes = 0;
3385
3386 for (int i = 0; (size_t)i < _max_task_num; i += 1) {
3387 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3388 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3389
3390 // Fetch the marked_bytes in this region for task i and
3391 // add it to the running total for this region.
3392 marked_bytes += marked_bytes_array[hrs_index];
3393
3394 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
3395 // into the global card bitmap.
3396 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3397
3398 while (scan_idx < limit_idx) {
3399 assert(task_card_bm->at(scan_idx) == true, "should be");
3400 _cm_card_bm->set_bit(scan_idx);
3401 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3402
3403 // BitMap::get_next_one_offset() can handle the case when
3404 // its left_offset parameter is greater than its right_offset
3405 // parameter. If does, however, have an early exit if
3406 // left_offset == right_offset. So let's limit the value
3407 // passed in for left offset here.
3408 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3409 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3410 }
3411 }
3412
3413 // Update the marked bytes for this region.
3414 hr->add_to_marked_bytes(marked_bytes);
3415
3416 // Now set the top at count to NTAMS.
3417 hr->set_top_at_conc_mark_count(limit);
3418
3419 // Next heap region
3420 return false;
3421 }
3422 };
3423
3424 class G1AggregateCountDataTask: public AbstractGangTask {
3425 protected:
3426 G1CollectedHeap* _g1h;
3427 ConcurrentMark* _cm;
3428 BitMap* _cm_card_bm;
3429 size_t _max_task_num;
3430 int _active_workers;
3431
3432 public:
3433 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3434 ConcurrentMark* cm,
3435 BitMap* cm_card_bm,
3436 size_t max_task_num,
3437 int n_workers) :
3438 AbstractGangTask("Count Aggregation"),
3439 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3440 _max_task_num(max_task_num),
3441 _active_workers(n_workers) { }
3442
3443 void work(uint worker_id) {
3444 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
3445
3446 if (G1CollectedHeap::use_parallel_gc_threads()) {
3447 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3448 _active_workers,
3449 HeapRegion::AggregateCountClaimValue);
3450 } else {
3451 _g1h->heap_region_iterate(&cl);
3452 }
3453 }
3454 };
3455
3456
3457 void ConcurrentMark::aggregate_count_data() {
3458 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3459 _g1h->workers()->active_workers() :
3460 1);
3461
3462 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3463 _max_task_num, n_workers);
3464
3465 if (G1CollectedHeap::use_parallel_gc_threads()) {
3466 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3467 "sanity check");
3468 _g1h->set_par_threads(n_workers);
3469 _g1h->workers()->run_task(&g1_par_agg_task);
3470 _g1h->set_par_threads(0);
3471
3472 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3473 "sanity check");
3474 _g1h->reset_heap_region_claim_values();
3475 } else {
3476 g1_par_agg_task.work(0);
3477 }
3478 }
3479
3480 // Clear the per-worker arrays used to store the per-region counting data
3481 void ConcurrentMark::clear_all_count_data() {
3482 // Clear the global card bitmap - it will be filled during
3483 // liveness count aggregation (during remark) and the
3484 // final counting task.
3485 _card_bm.clear();
3486
3487 // Clear the global region bitmap - it will be filled as part
3488 // of the final counting task.
3489 _region_bm.clear();
3490
3491 size_t max_regions = _g1h->max_regions();
3492 assert(_max_task_num != 0, "unitialized");
3493
3494 for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3495 BitMap* task_card_bm = count_card_bitmap_for(i);
3496 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3497
3498 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3499 assert(marked_bytes_array != NULL, "uninitialized");
3500
3501 memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
3502 task_card_bm->clear();
3503 }
3504 }
3505
3003 void ConcurrentMark::print_stats() { 3506 void ConcurrentMark::print_stats() {
3004 if (verbose_stats()) { 3507 if (verbose_stats()) {
3005 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3508 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3006 for (size_t i = 0; i < _active_tasks; ++i) { 3509 for (size_t i = 0; i < _active_tasks; ++i) {
3007 _tasks[i]->print_stats(); 3510 _tasks[i]->print_stats();
3333 3836
3334 // abandon current marking iteration due to a Full GC 3837 // abandon current marking iteration due to a Full GC
3335 void ConcurrentMark::abort() { 3838 void ConcurrentMark::abort() {
3336 // Clear all marks to force marking thread to do nothing 3839 // Clear all marks to force marking thread to do nothing
3337 _nextMarkBitMap->clearAll(); 3840 _nextMarkBitMap->clearAll();
3841 // Clear the liveness counting data
3842 clear_all_count_data();
3338 // Empty mark stack 3843 // Empty mark stack
3339 clear_marking_state(); 3844 clear_marking_state();
3340 for (int i = 0; i < (int)_max_task_num; ++i) { 3845 for (int i = 0; i < (int)_max_task_num; ++i) {
3341 _tasks[i]->clear_region_fields(); 3846 _tasks[i]->clear_region_fields();
3342 } 3847 }
3385 } 3890 }
3386 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3891 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3387 (_init_times.sum() + _remark_times.sum() + 3892 (_init_times.sum() + _remark_times.sum() +
3388 _cleanup_times.sum())/1000.0); 3893 _cleanup_times.sum())/1000.0);
3389 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3894 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3390 "(%8.2f s marking, %8.2f s counting).", 3895 "(%8.2f s marking).",
3391 cmThread()->vtime_accum(), 3896 cmThread()->vtime_accum(),
3392 cmThread()->vtime_mark_accum(), 3897 cmThread()->vtime_mark_accum());
3393 cmThread()->vtime_count_accum());
3394 } 3898 }
3395 3899
3396 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3900 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3397 _parallel_workers->print_worker_threads_on(st); 3901 _parallel_workers->print_worker_threads_on(st);
3398 } 3902 }
4680 _claimed = false; 5184 _claimed = false;
4681 } 5185 }
4682 5186
4683 CMTask::CMTask(int task_id, 5187 CMTask::CMTask(int task_id,
4684 ConcurrentMark* cm, 5188 ConcurrentMark* cm,
5189 size_t* marked_bytes,
5190 BitMap* card_bm,
4685 CMTaskQueue* task_queue, 5191 CMTaskQueue* task_queue,
4686 CMTaskQueueSet* task_queues) 5192 CMTaskQueueSet* task_queues)
4687 : _g1h(G1CollectedHeap::heap()), 5193 : _g1h(G1CollectedHeap::heap()),
4688 _task_id(task_id), _cm(cm), 5194 _task_id(task_id), _cm(cm),
4689 _claimed(false), 5195 _claimed(false),
4690 _nextMarkBitMap(NULL), _hash_seed(17), 5196 _nextMarkBitMap(NULL), _hash_seed(17),
4691 _task_queue(task_queue), 5197 _task_queue(task_queue),
4692 _task_queues(task_queues), 5198 _task_queues(task_queues),
4693 _cm_oop_closure(NULL), 5199 _cm_oop_closure(NULL),
4694 _aborted_region(MemRegion()) { 5200 _aborted_region(MemRegion()),
5201 _marked_bytes_array(marked_bytes),
5202 _card_bm(card_bm) {
4695 guarantee(task_queue != NULL, "invariant"); 5203 guarantee(task_queue != NULL, "invariant");
4696 guarantee(task_queues != NULL, "invariant"); 5204 guarantee(task_queues != NULL, "invariant");
4697 5205
4698 statsOnly( _clock_due_to_scanning = 0; 5206 statsOnly( _clock_due_to_scanning = 0;
4699 _clock_due_to_marking = 0 ); 5207 _clock_due_to_marking = 0 );