comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 3771:842b840e67db

7046558: G1: concurrent marking optimizations Summary: Some optimizations to improve the concurrent marking phase: specialize the main oop closure, make sure a few methods in the fast path are properly inlined, a few more bits and pieces, and some cosmetic fixes. Reviewed-by: stefank, johnc
author tonyp
date Tue, 14 Jun 2011 10:33:43 -0400
parents 69293e516993
children 6747fd0512e0
comparison
equal deleted inserted replaced
3770:74cd10898bea 3771:842b840e67db
22 * 22 *
23 */ 23 */
24 24
25 #include "precompiled.hpp" 25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp" 26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/g1RemSet.hpp"
32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionRemSet.hpp"
33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
34 #include "gc_implementation/shared/vmGCOperations.hpp" 35 #include "gc_implementation/shared/vmGCOperations.hpp"
35 #include "memory/genOopClosures.inline.hpp" 36 #include "memory/genOopClosures.inline.hpp"
2544 2545
2545 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2546 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2546 }; 2547 };
2547 2548
2548 void ConcurrentMark::deal_with_reference(oop obj) { 2549 void ConcurrentMark::deal_with_reference(oop obj) {
2549 if (verbose_high()) 2550 if (verbose_high()) {
2550 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2551 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2551 (void*) obj); 2552 (void*) obj);
2552 2553 }
2553 2554
2554 HeapWord* objAddr = (HeapWord*) obj; 2555 HeapWord* objAddr = (HeapWord*) obj;
2555 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2556 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2556 if (_g1h->is_in_g1_reserved(objAddr)) { 2557 if (_g1h->is_in_g1_reserved(objAddr)) {
2557 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2558 assert(obj != NULL, "null check is implicit");
2558 HeapRegion* hr = _g1h->heap_region_containing(obj); 2559 if (!_nextMarkBitMap->isMarked(objAddr)) {
2559 if (_g1h->is_obj_ill(obj, hr)) { 2560 // Only get the containing region if the object is not marked on the
2560 if (verbose_high()) 2561 // bitmap (otherwise, it's a waste of time since we won't do
2561 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2562 // anything with it).
2562 "marked", (void*) obj); 2563 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2563 2564 if (!hr->obj_allocated_since_next_marking(obj)) {
2564 // we need to mark it first 2565 if (verbose_high()) {
2565 if (_nextMarkBitMap->parMark(objAddr)) { 2566 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2566 // No OrderAccess:store_load() is needed. It is implicit in the 2567 "marked", (void*) obj);
2567 // CAS done in parMark(objAddr) above 2568 }
2568 HeapWord* finger = _finger; 2569
2569 if (objAddr < finger) { 2570 // we need to mark it first
2570 if (verbose_high()) 2571 if (_nextMarkBitMap->parMark(objAddr)) {
2571 gclog_or_tty->print_cr("[global] below the global finger " 2572 // No OrderAccess:store_load() is needed. It is implicit in the
2572 "("PTR_FORMAT"), pushing it", finger); 2573 // CAS done in parMark(objAddr) above
2573 if (!mark_stack_push(obj)) { 2574 HeapWord* finger = _finger;
2574 if (verbose_low()) 2575 if (objAddr < finger) {
2575 gclog_or_tty->print_cr("[global] global stack overflow during " 2576 if (verbose_high()) {
2576 "deal_with_reference"); 2577 gclog_or_tty->print_cr("[global] below the global finger "
2578 "("PTR_FORMAT"), pushing it", finger);
2579 }
2580 if (!mark_stack_push(obj)) {
2581 if (verbose_low()) {
2582 gclog_or_tty->print_cr("[global] global stack overflow during "
2583 "deal_with_reference");
2584 }
2585 }
2577 } 2586 }
2578 } 2587 }
2579 } 2588 }
2580 } 2589 }
2581 } 2590 }
2629 // _heap_end will not change underneath our feet; it only changes at 2638 // _heap_end will not change underneath our feet; it only changes at
2630 // yield points. 2639 // yield points.
2631 while (finger < _heap_end) { 2640 while (finger < _heap_end) {
2632 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2641 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2633 2642
2634 // is the gap between reading the finger and doing the CAS too long? 2643 // Note on how this code handles humongous regions. In the
2635 2644 // normal case the finger will reach the start of a "starts
2636 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2645 // humongous" (SH) region. Its end will either be the end of the
2646 // last "continues humongous" (CH) region in the sequence, or the
2647 // standard end of the SH region (if the SH is the only region in
2648 // the sequence). That way claim_region() will skip over the CH
2649 // regions. However, there is a subtle race between a CM thread
2650 // executing this method and a mutator thread doing a humongous
2651 // object allocation. The two are not mutually exclusive as the CM
2652 // thread does not need to hold the Heap_lock when it gets
2653 // here. So there is a chance that claim_region() will come across
2654 // a free region that's in the progress of becoming a SH or a CH
2655 // region. In the former case, it will either
2656 // a) Miss the update to the region's end, in which case it will
2657 // visit every subsequent CH region, will find their bitmaps
2658 // empty, and do nothing, or
2659 // b) Will observe the update of the region's end (in which case
2660 // it will skip the subsequent CH regions).
2661 // If it comes across a region that suddenly becomes CH, the
2662 // scenario will be similar to b). So, the race between
2663 // claim_region() and a humongous object allocation might force us
2664 // to do a bit of unnecessary work (due to some unnecessary bitmap
2665 // iterations) but it should not introduce and correctness issues.
2666 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2637 HeapWord* bottom = curr_region->bottom(); 2667 HeapWord* bottom = curr_region->bottom();
2638 HeapWord* end = curr_region->end(); 2668 HeapWord* end = curr_region->end();
2639 HeapWord* limit = curr_region->next_top_at_mark_start(); 2669 HeapWord* limit = curr_region->next_top_at_mark_start();
2640 2670
2641 if (verbose_low()) 2671 if (verbose_low()) {
2642 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2672 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2643 "["PTR_FORMAT", "PTR_FORMAT"), " 2673 "["PTR_FORMAT", "PTR_FORMAT"), "
2644 "limit = "PTR_FORMAT, 2674 "limit = "PTR_FORMAT,
2645 task_num, curr_region, bottom, end, limit); 2675 task_num, curr_region, bottom, end, limit);
2646 2676 }
2647 HeapWord* res = 2677
2648 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2678 // Is the gap between reading the finger and doing the CAS too long?
2679 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2649 if (res == finger) { 2680 if (res == finger) {
2650 // we succeeded 2681 // we succeeded
2651 2682
2652 // notice that _finger == end cannot be guaranteed here since, 2683 // notice that _finger == end cannot be guaranteed here since,
2653 // someone else might have moved the finger even further 2684 // someone else might have moved the finger even further
3189 } 3220 }
3190 gclog_or_tty->print_cr(""); 3221 gclog_or_tty->print_cr("");
3191 } 3222 }
3192 #endif 3223 #endif
3193 3224
3225 void CMTask::scan_object(oop obj) {
3226 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3227
3228 if (_cm->verbose_high()) {
3229 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3230 _task_id, (void*) obj);
3231 }
3232
3233 size_t obj_size = obj->size();
3234 _words_scanned += obj_size;
3235
3236 obj->oop_iterate(_cm_oop_closure);
3237 statsOnly( ++_objs_scanned );
3238 check_limits();
3239 }
3240
3194 // Closure for iteration over bitmaps 3241 // Closure for iteration over bitmaps
3195 class CMBitMapClosure : public BitMapClosure { 3242 class CMBitMapClosure : public BitMapClosure {
3196 private: 3243 private:
3197 // the bitmap that is being iterated over 3244 // the bitmap that is being iterated over
3198 CMBitMap* _nextMarkBitMap; 3245 CMBitMap* _nextMarkBitMap;
3252 } 3299 }
3253 3300
3254 CMObjectClosure(CMTask* task) : _task(task) { } 3301 CMObjectClosure(CMTask* task) : _task(task) { }
3255 }; 3302 };
3256 3303
3257 // Closure for iterating over object fields 3304 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3258 class CMOopClosure : public OopClosure { 3305 ConcurrentMark* cm,
3259 private: 3306 CMTask* task)
3260 G1CollectedHeap* _g1h; 3307 : _g1h(g1h), _cm(cm), _task(task) {
3261 ConcurrentMark* _cm; 3308 assert(_ref_processor == NULL, "should be initialized to NULL");
3262 CMTask* _task; 3309
3263 3310 if (G1UseConcMarkReferenceProcessing) {
3264 public: 3311 _ref_processor = g1h->ref_processor();
3265 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3312 assert(_ref_processor != NULL, "should not be NULL");
3266 virtual void do_oop( oop* p) { do_oop_work(p); } 3313 }
3267 3314 }
3268 template <class T> void do_oop_work(T* p) {
3269 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
3270 assert(!_g1h->is_on_master_free_list(
3271 _g1h->heap_region_containing((HeapWord*) p)), "invariant");
3272
3273 oop obj = oopDesc::load_decode_heap_oop(p);
3274 if (_cm->verbose_high())
3275 gclog_or_tty->print_cr("[%d] we're looking at location "
3276 "*"PTR_FORMAT" = "PTR_FORMAT,
3277 _task->task_id(), p, (void*) obj);
3278 _task->deal_with_reference(obj);
3279 }
3280
3281 CMOopClosure(G1CollectedHeap* g1h,
3282 ConcurrentMark* cm,
3283 CMTask* task)
3284 : _g1h(g1h), _cm(cm), _task(task)
3285 {
3286 assert(_ref_processor == NULL, "should be initialized to NULL");
3287
3288 if (G1UseConcMarkReferenceProcessing) {
3289 _ref_processor = g1h->ref_processor();
3290 assert(_ref_processor != NULL, "should not be NULL");
3291 }
3292 }
3293 };
3294 3315
3295 void CMTask::setup_for_region(HeapRegion* hr) { 3316 void CMTask::setup_for_region(HeapRegion* hr) {
3296 // Separated the asserts so that we know which one fires. 3317 // Separated the asserts so that we know which one fires.
3297 assert(hr != NULL, 3318 assert(hr != NULL,
3298 "claim_region() should have filtered out continues humongous regions"); 3319 "claim_region() should have filtered out continues humongous regions");
3360 _region_limit = NULL; 3381 _region_limit = NULL;
3361 3382
3362 _region_finger = NULL; 3383 _region_finger = NULL;
3363 } 3384 }
3364 3385
3386 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3387 if (cm_oop_closure == NULL) {
3388 assert(_cm_oop_closure != NULL, "invariant");
3389 } else {
3390 assert(_cm_oop_closure == NULL, "invariant");
3391 }
3392 _cm_oop_closure = cm_oop_closure;
3393 }
3394
3365 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3395 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3366 guarantee(nextMarkBitMap != NULL, "invariant"); 3396 guarantee(nextMarkBitMap != NULL, "invariant");
3367 3397
3368 if (_cm->verbose_low()) 3398 if (_cm->verbose_low())
3369 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3399 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3407 regular_clock_call(); 3437 regular_clock_call();
3408 // This is called when we are in the termination protocol. We should 3438 // This is called when we are in the termination protocol. We should
3409 // quit if, for some reason, this task wants to abort or the global 3439 // quit if, for some reason, this task wants to abort or the global
3410 // stack is not empty (this means that we can get work from it). 3440 // stack is not empty (this means that we can get work from it).
3411 return !_cm->mark_stack_empty() || has_aborted(); 3441 return !_cm->mark_stack_empty() || has_aborted();
3412 }
3413
3414 // This determines whether the method below will check both the local
3415 // and global fingers when determining whether to push on the stack a
3416 // gray object (value 1) or whether it will only check the global one
3417 // (value 0). The tradeoffs are that the former will be a bit more
3418 // accurate and possibly push less on the stack, but it might also be
3419 // a little bit slower.
3420
3421 #define _CHECK_BOTH_FINGERS_ 1
3422
3423 void CMTask::deal_with_reference(oop obj) {
3424 if (_cm->verbose_high())
3425 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3426 _task_id, (void*) obj);
3427
3428 ++_refs_reached;
3429
3430 HeapWord* objAddr = (HeapWord*) obj;
3431 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3432 if (_g1h->is_in_g1_reserved(objAddr)) {
3433 assert(obj != NULL, "is_in_g1_reserved should ensure this");
3434 HeapRegion* hr = _g1h->heap_region_containing(obj);
3435 if (_g1h->is_obj_ill(obj, hr)) {
3436 if (_cm->verbose_high())
3437 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3438 _task_id, (void*) obj);
3439
3440 // we need to mark it first
3441 if (_nextMarkBitMap->parMark(objAddr)) {
3442 // No OrderAccess:store_load() is needed. It is implicit in the
3443 // CAS done in parMark(objAddr) above
3444 HeapWord* global_finger = _cm->finger();
3445
3446 #if _CHECK_BOTH_FINGERS_
3447 // we will check both the local and global fingers
3448
3449 if (_finger != NULL && objAddr < _finger) {
3450 if (_cm->verbose_high())
3451 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3452 "pushing it", _task_id, _finger);
3453 push(obj);
3454 } else if (_curr_region != NULL && objAddr < _region_limit) {
3455 // do nothing
3456 } else if (objAddr < global_finger) {
3457 // Notice that the global finger might be moving forward
3458 // concurrently. This is not a problem. In the worst case, we
3459 // mark the object while it is above the global finger and, by
3460 // the time we read the global finger, it has moved forward
3461 // passed this object. In this case, the object will probably
3462 // be visited when a task is scanning the region and will also
3463 // be pushed on the stack. So, some duplicate work, but no
3464 // correctness problems.
3465
3466 if (_cm->verbose_high())
3467 gclog_or_tty->print_cr("[%d] below the global finger "
3468 "("PTR_FORMAT"), pushing it",
3469 _task_id, global_finger);
3470 push(obj);
3471 } else {
3472 // do nothing
3473 }
3474 #else // _CHECK_BOTH_FINGERS_
3475 // we will only check the global finger
3476
3477 if (objAddr < global_finger) {
3478 // see long comment above
3479
3480 if (_cm->verbose_high())
3481 gclog_or_tty->print_cr("[%d] below the global finger "
3482 "("PTR_FORMAT"), pushing it",
3483 _task_id, global_finger);
3484 push(obj);
3485 }
3486 #endif // _CHECK_BOTH_FINGERS_
3487 }
3488 }
3489 }
3490 }
3491
3492 void CMTask::push(oop obj) {
3493 HeapWord* objAddr = (HeapWord*) obj;
3494 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3495 assert(!_g1h->is_on_master_free_list(
3496 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3497 assert(!_g1h->is_obj_ill(obj), "invariant");
3498 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3499
3500 if (_cm->verbose_high())
3501 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3502
3503 if (!_task_queue->push(obj)) {
3504 // The local task queue looks full. We need to push some entries
3505 // to the global stack.
3506
3507 if (_cm->verbose_medium())
3508 gclog_or_tty->print_cr("[%d] task queue overflow, "
3509 "moving entries to the global stack",
3510 _task_id);
3511 move_entries_to_global_stack();
3512
3513 // this should succeed since, even if we overflow the global
3514 // stack, we should have definitely removed some entries from the
3515 // local queue. So, there must be space on it.
3516 bool success = _task_queue->push(obj);
3517 assert(success, "invariant");
3518 }
3519
3520 statsOnly( int tmp_size = _task_queue->size();
3521 if (tmp_size > _local_max_size)
3522 _local_max_size = tmp_size;
3523 ++_local_pushes );
3524 } 3442 }
3525 3443
3526 void CMTask::reached_limit() { 3444 void CMTask::reached_limit() {
3527 assert(_words_scanned >= _words_scanned_limit || 3445 assert(_words_scanned >= _words_scanned_limit ||
3528 _refs_reached >= _refs_reached_limit , 3446 _refs_reached >= _refs_reached_limit ,
4156 4074
4157 // Set up the bitmap and oop closures. Anything that uses them is 4075 // Set up the bitmap and oop closures. Anything that uses them is
4158 // eventually called from this method, so it is OK to allocate these 4076 // eventually called from this method, so it is OK to allocate these
4159 // statically. 4077 // statically.
4160 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4078 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4161 CMOopClosure oop_closure(_g1h, _cm, this); 4079 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4162 set_oop_closure(&oop_closure); 4080 set_cm_oop_closure(&cm_oop_closure);
4163 4081
4164 if (_cm->has_overflown()) { 4082 if (_cm->has_overflown()) {
4165 // This can happen if the region stack or the mark stack overflows 4083 // This can happen if the region stack or the mark stack overflows
4166 // during a GC pause and this task, after a yield point, 4084 // during a GC pause and this task, after a yield point,
4167 // restarts. We have to abort as we need to get into the overflow 4085 // restarts. We have to abort as we need to get into the overflow
4433 } 4351 }
4434 4352
4435 // Mainly for debugging purposes to make sure that a pointer to the 4353 // Mainly for debugging purposes to make sure that a pointer to the
4436 // closure which was statically allocated in this frame doesn't 4354 // closure which was statically allocated in this frame doesn't
4437 // escape it by accident. 4355 // escape it by accident.
4438 set_oop_closure(NULL); 4356 set_cm_oop_closure(NULL);
4439 double end_time_ms = os::elapsedVTime() * 1000.0; 4357 double end_time_ms = os::elapsedVTime() * 1000.0;
4440 double elapsed_time_ms = end_time_ms - _start_time_ms; 4358 double elapsed_time_ms = end_time_ms - _start_time_ms;
4441 // Update the step history. 4359 // Update the step history.
4442 _step_times_ms.add(elapsed_time_ms); 4360 _step_times_ms.add(elapsed_time_ms);
4443 4361
4508 _task_id(task_id), _cm(cm), 4426 _task_id(task_id), _cm(cm),
4509 _claimed(false), 4427 _claimed(false),
4510 _nextMarkBitMap(NULL), _hash_seed(17), 4428 _nextMarkBitMap(NULL), _hash_seed(17),
4511 _task_queue(task_queue), 4429 _task_queue(task_queue),
4512 _task_queues(task_queues), 4430 _task_queues(task_queues),
4513 _oop_closure(NULL), 4431 _cm_oop_closure(NULL),
4514 _aborted_region(MemRegion()) { 4432 _aborted_region(MemRegion()) {
4515 guarantee(task_queue != NULL, "invariant"); 4433 guarantee(task_queue != NULL, "invariant");
4516 guarantee(task_queues != NULL, "invariant"); 4434 guarantee(task_queues != NULL, "invariant");
4517 4435
4518 statsOnly( _clock_due_to_scanning = 0; 4436 statsOnly( _clock_due_to_scanning = 0;