# HG changeset patch # User iveresov # Date 1265932339 28800 # Node ID 0414c1049f159aa7bfff4737a9fd6d47949980ab # Parent 8859772195c68e7a6182df2f4fc6448b6734fb10 6923991: G1: improve scalability of RSet scanning Summary: Implemented block-based work stealing. Moved copying during the rset scanning phase to the main copying phase. Made the size of rset table depend on the region size. Reviewed-by: apetrusenko, tonyp diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Feb 11 15:52:19 2010 -0800 @@ -2646,6 +2646,13 @@ // +struct PrepareForRSScanningClosure : public HeapRegionClosure { + bool doHeapRegion(HeapRegion *r) { + r->rem_set()->set_iter_claimed(0); + return false; + } +}; + void G1CollectedHeap::do_collection_pause_at_safepoint() { if (PrintHeapAtGC) { @@ -2784,6 +2791,8 @@ gclog_or_tty->print_cr("\nAfter pause, heap:"); print(); #endif + PrepareForRSScanningClosure prepare_for_rs_scan; + collection_set_iterate(&prepare_for_rs_scan); setup_surviving_young_words(); @@ -3781,22 +3790,16 @@ return obj; } -template +template template -void G1ParCopyClosure +void G1ParCopyClosure ::do_oop_work(T* p) { oop obj = oopDesc::load_decode_heap_oop(p); assert(barrier != G1BarrierRS || obj != NULL, "Precondition: G1BarrierRS implies obj is nonNull"); - // The only time we skip the cset test is when we're scanning - // references popped from the queue. And we only push on the queue - // references that we know point into the cset, so no point in - // checking again. But we'll leave an assert here for peace of mind. - assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant"); - // here the null check is implicit in the cset_fast_test() test - if (skip_cset_test || _g1->in_cset_fast_test(obj)) { + if (_g1->in_cset_fast_test(obj)) { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" " "into CS.", p, (void*) obj); @@ -3813,7 +3816,6 @@ } } - // When scanning moved objs, must look at all oops. if (barrier == G1BarrierEvac && obj != NULL) { _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } @@ -3823,8 +3825,8 @@ } } -template void G1ParCopyClosure::do_oop_work(oop* p); -template void G1ParCopyClosure::do_oop_work(narrowOop* p); +template void G1ParCopyClosure::do_oop_work(oop* p); +template void G1ParCopyClosure::do_oop_work(narrowOop* p); template void G1ParScanPartialArrayClosure::do_oop_nv(T* p) { assert(has_partial_array_mask(p), "invariant"); @@ -3896,11 +3898,11 @@ assert(UseCompressedOops, "Error"); narrowOop* p = (narrowOop*) stolen_task; assert(has_partial_array_mask(p) || - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "Error"); + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "Error"); pss->push_on_queue(p); } else { oop* p = (oop*) stolen_task; - assert(has_partial_array_mask(p) || _g1h->obj_in_cs(*p), "Error"); + assert(has_partial_array_mask(p) || _g1h->is_in_g1_reserved(*p), "Error"); pss->push_on_queue(p); } continue; @@ -3962,6 +3964,7 @@ G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); @@ -3985,7 +3988,7 @@ _g1h->g1_process_strong_roots(/* not collecting perm */ false, SharedHeap::SO_AllClasses, scan_root_cl, - &only_scan_heap_rs_cl, + &push_heap_rs_cl, scan_so_cl, scan_perm_cl, i); diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -1623,7 +1623,7 @@ template void push_on_queue(T* ref) { assert(ref != NULL, "invariant"); assert(has_partial_array_mask(ref) || - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(ref)), "invariant"); + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(ref)), "invariant"); #ifdef ASSERT if (has_partial_array_mask(ref)) { oop p = clear_partial_array_mask(ref); @@ -1644,9 +1644,9 @@ assert((oop*)ref != NULL, "pop_local() returned true"); assert(UseCompressedOops || !ref.is_narrow(), "Error"); assert(has_partial_array_mask((oop*)ref) || - _g1h->obj_in_cs(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref) - : oopDesc::load_decode_heap_oop((oop*)ref)), - "invariant"); + _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref) + : oopDesc::load_decode_heap_oop((oop*)ref)), + "invariant"); IF_G1_DETAILED_STATS(note_pop()); } else { StarTask null_task; @@ -1659,9 +1659,9 @@ assert((oop*)new_ref != NULL, "pop() from a local non-empty stack"); assert(UseCompressedOops || !new_ref.is_narrow(), "Error"); assert(has_partial_array_mask((oop*)new_ref) || - _g1h->obj_in_cs(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref) - : oopDesc::load_decode_heap_oop((oop*)new_ref)), - "invariant"); + _g1h->is_in_g1_reserved(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref) + : oopDesc::load_decode_heap_oop((oop*)new_ref)), + "invariant"); ref = new_ref; } @@ -1825,12 +1825,12 @@ assert(UseCompressedOops, "Error"); narrowOop* p = (narrowOop*)ref_to_scan; assert(!has_partial_array_mask(p) && - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity"); + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity"); deal_with_reference(p); } else { oop* p = (oop*)ref_to_scan; - assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) || - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity"); + assert((has_partial_array_mask(p) && _g1h->is_in_g1_reserved(clear_partial_array_mask(p))) || + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity"); deal_with_reference(p); } } @@ -1844,12 +1844,12 @@ assert(UseCompressedOops, "Error"); narrowOop* p = (narrowOop*)ref_to_scan; assert(!has_partial_array_mask(p) && - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity"); + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity"); deal_with_reference(p); } else { oop* p = (oop*)ref_to_scan; assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) || - _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity"); + _g1h->is_in_g1_reserved(oopDesc::load_decode_heap_oop(p)), "sanity"); deal_with_reference(p); } } diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Feb 11 15:52:19 2010 -0800 @@ -205,6 +205,7 @@ // policy is created before the heap, we have to set this up here, // so it's done as soon as possible. HeapRegion::setup_heap_region_size(Arguments::min_heap_size()); + HeapRegionRemSet::setup_remset_size(); _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime()); _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0; diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1OopClosures.hpp --- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -53,6 +53,15 @@ bool apply_to_weak_ref_discovered_field() { return true; } }; +class G1ParPushHeapRSClosure : public G1ParClosureSuper { +public: + G1ParPushHeapRSClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + G1ParClosureSuper(g1, par_scan_state) { } + template void do_oop_nv(T* p); + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + class G1ParScanClosure : public G1ParClosureSuper { public: G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : @@ -100,7 +109,7 @@ }; template + bool do_mark_forwardee> class G1ParCopyClosure : public G1ParCopyHelper { G1ParScanClosure _scanner; template void do_oop_work(T* p); @@ -116,12 +125,13 @@ virtual void do_oop(narrowOop* p) { do_oop_nv(p); } }; -typedef G1ParCopyClosure G1ParScanExtRootClosure; -typedef G1ParCopyClosure G1ParScanPermClosure; -typedef G1ParCopyClosure G1ParScanHeapRSClosure; -typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; -typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; -typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; +typedef G1ParCopyClosure G1ParScanExtRootClosure; +typedef G1ParCopyClosure G1ParScanPermClosure; +typedef G1ParCopyClosure G1ParScanHeapRSClosure; +typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; +typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; +typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; + // This is the only case when we set skip_cset_test. Basically, this // closure is (should?) only be called directly while we're draining // the overflow and task queues. In that case we know that the @@ -132,7 +142,7 @@ // We need a separate closure to handle references during evacuation // failure processing, as we cannot asume that the reference already // points into the collection set (like G1ParScanHeapEvacClosure does). -typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; +typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; class FilterIntoCSClosure: public OopClosure { G1CollectedHeap* _g1; diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -104,3 +104,16 @@ } } } + +template inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + if (_g1->in_cset_fast_test(obj)) { + Prefetch::write(obj->mark_addr(), 0); + Prefetch::read(obj->mark_addr(), (HeapWordSize*2)); + _par_scan_state->push_on_queue(p); + } + } +} diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Feb 11 15:52:19 2010 -0800 @@ -155,8 +155,8 @@ G1BlockOffsetSharedArray* _bot_shared; CardTableModRefBS *_ct_bs; int _worker_i; + int _block_size; bool _try_claimed; - size_t _min_skip_distance, _max_skip_distance; public: ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) : _oc(oc), @@ -168,8 +168,7 @@ _g1h = G1CollectedHeap::heap(); _bot_shared = _g1h->bot_shared(); _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set()); - _min_skip_distance = 16; - _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance; + _block_size = MAX2(G1RSetScanBlockSize, 1); } void set_try_claimed() { _try_claimed = true; } @@ -225,12 +224,15 @@ HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i); hrrs->init_iterator(iter); size_t card_index; - size_t skip_distance = 0, current_card = 0, jump_to_card = 0; - while (iter->has_next(card_index)) { - if (current_card < jump_to_card) { - ++current_card; - continue; + + // We claim cards in block so as to recude the contention. The block size is determined by + // the G1RSetScanBlockSize parameter. + size_t jump_to_card = hrrs->iter_claimed_next(_block_size); + for (size_t current_card = 0; iter->has_next(card_index); current_card++) { + if (current_card >= jump_to_card + _block_size) { + jump_to_card = hrrs->iter_claimed_next(_block_size); } + if (current_card < jump_to_card) continue; HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index); #if 0 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n", @@ -247,22 +249,14 @@ // If the card is dirty, then we will scan it during updateRS. if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { - if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) { - scanCard(card_index, card_region); - } else if (_try_claimed) { - if (jump_to_card == 0 || jump_to_card != current_card) { - // We did some useful work in the previous iteration. - // Decrease the distance. - skip_distance = MAX2(skip_distance >> 1, _min_skip_distance); - } else { - // Previous iteration resulted in a claim failure. - // Increase the distance. - skip_distance = MIN2(skip_distance << 1, _max_skip_distance); - } - jump_to_card = current_card + skip_distance; - } + // We make the card as "claimed" lazily (so races are possible but they're benign), + // which reduces the number of duplicate scans (the rsets of the regions in the cset + // can intersect). + if (!_ct_bs->is_card_claimed(card_index)) { + _ct_bs->set_card_claimed(card_index); + scanCard(card_index, card_region); + } } - ++current_card; } if (!_try_claimed) { hrrs->set_iter_complete(); @@ -299,30 +293,18 @@ double rs_time_start = os::elapsedTime(); HeapRegion *startRegion = calculateStartRegion(worker_i); - BufferingOopsInHeapRegionClosure boc(oc); - ScanRSClosure scanRScl(&boc, worker_i); + ScanRSClosure scanRScl(oc, worker_i); _g1->collection_set_iterate_from(startRegion, &scanRScl); scanRScl.set_try_claimed(); _g1->collection_set_iterate_from(startRegion, &scanRScl); - boc.done(); - double closure_app_time_sec = boc.closure_app_seconds(); - double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - - closure_app_time_sec; - double closure_app_time_ms = closure_app_time_sec * 1000.0; + double scan_rs_time_sec = os::elapsedTime() - rs_time_start; assert( _cards_scanned != NULL, "invariant" ); _cards_scanned[worker_i] = scanRScl.cards_done(); _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - - double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); - if (scan_new_refs_time_ms > 0.0) { - closure_app_time_ms += scan_new_refs_time_ms; - } - - _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); } void HRInto_G1RemSet::updateRS(int worker_i) { @@ -449,9 +431,8 @@ oc->do_oop(p); } } - _g1p->record_scan_new_refs_time(worker_i, - (os::elapsedTime() - scan_new_refs_start_sec) - * 1000.0); + double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0; + _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms); } void HRInto_G1RemSet::cleanupHRRS() { diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -207,8 +207,20 @@ develop(bool, G1PrintOopAppls, false, \ "When true, print applications of closures to external locs.") \ \ - develop(intx, G1LogRSRegionEntries, 7, \ - "Log_2 of max number of regions for which we keep bitmaps.") \ + develop(intx, G1RSetRegionEntriesBase, 256, \ + "Max number of regions in a fine-grain table per MB.") \ + \ + product(intx, G1RSetRegionEntries, 0, \ + "Max number of regions for which we keep bitmaps." \ + "Will be set ergonomically by default") \ + \ + develop(intx, G1RSetSparseRegionEntriesBase, 4, \ + "Max number of entries per region in a sparse table " \ + "per MB.") \ + \ + product(intx, G1RSetSparseRegionEntries, 0, \ + "Max number of entries per region in a sparse table." \ + "Will be set ergonomically by default.") \ \ develop(bool, G1RecordHRRSOops, false, \ "When true, record recent calls to rem set operations.") \ @@ -293,6 +305,10 @@ develop(bool, G1VerifyCTCleanup, false, \ "Verify card table cleanup.") \ \ + product(uintx, G1RSetScanBlockSize, 64, \ + "Size of a work unit of cards claimed by a worker thread" \ + "during RSet scanning.") \ + \ develop(bool, ReduceInitialCardMarksForG1, false, \ "When ReduceInitialCardMarks is true, this flag setting " \ " controls whether G1 allows the RICM optimization") diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp --- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -33,11 +33,12 @@ }; template + bool do_mark_forwardee> class G1ParCopyClosure; class G1ParScanClosure; +class G1ParPushHeapRSClosure; -typedef G1ParCopyClosure G1ParScanHeapEvacClosure; +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; class FilterIntoCSClosure; class FilterOutOfRegionClosure; @@ -51,6 +52,7 @@ #define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ f(G1ParScanHeapEvacClosure,_nv) \ f(G1ParScanClosure,_nv) \ + f(G1ParPushHeapRSClosure,_nv) \ f(FilterIntoCSClosure,_nv) \ f(FilterOutOfRegionClosure,_nv) \ f(FilterInHeapRegionAndIntoCSClosure,_nv) \ diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Feb 11 15:52:19 2010 -0800 @@ -505,12 +505,13 @@ typedef PosParPRT* PosParPRTPtr; if (_max_fine_entries == 0) { assert(_mod_max_fine_entries_mask == 0, "Both or none."); - _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries); + size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries); + _max_fine_entries = (size_t)(1 << max_entries_log); _mod_max_fine_entries_mask = _max_fine_entries - 1; #if SAMPLE_FOR_EVICTION assert(_fine_eviction_sample_size == 0 && _fine_eviction_stride == 0, "All init at same time."); - _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries); + _fine_eviction_sample_size = MAX2((size_t)4, max_entries_log); _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size; #endif } @@ -655,13 +656,6 @@ #endif } - // Otherwise, transfer from sparse to fine-grain. - CardIdx_t cards[SparsePRTEntry::CardsPerEntry]; - if (G1HRRSUseSparseTable) { - bool res = _sparse_table.get_cards(from_hrs_ind, &cards[0]); - assert(res, "There should have been an entry"); - } - if (_n_fine_entries == _max_fine_entries) { prt = delete_region_table(); } else { @@ -676,10 +670,12 @@ _fine_grain_regions[ind] = prt; _n_fine_entries++; - // Add in the cards from the sparse table. if (G1HRRSUseSparseTable) { - for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) { - CardIdx_t c = cards[i]; + // Transfer from sparse to fine-grain. + SparsePRTEntry *sprt_entry = _sparse_table.get_entry(from_hrs_ind); + assert(sprt_entry != NULL, "There should have been an entry"); + for (int i = 0; i < SparsePRTEntry::cards_num(); i++) { + CardIdx_t c = sprt_entry->card(i); if (c != SparsePRTEntry::NullEntry) { prt->add_card(c); } @@ -1084,6 +1080,19 @@ {} +void HeapRegionRemSet::setup_remset_size() { + // Setup sparse and fine-grain tables sizes. + // table_size = base * (log(region_size / 1M) + 1) + int region_size_log_mb = MAX2((int)HeapRegion::LogOfHRGrainBytes - (int)LOG_M, 0); + if (FLAG_IS_DEFAULT(G1RSetSparseRegionEntries)) { + G1RSetSparseRegionEntries = G1RSetSparseRegionEntriesBase * (region_size_log_mb + 1); + } + if (FLAG_IS_DEFAULT(G1RSetRegionEntries)) { + G1RSetRegionEntries = G1RSetRegionEntriesBase * (region_size_log_mb + 1); + } + guarantee(G1RSetSparseRegionEntries > 0 && G1RSetRegionEntries > 0 , "Sanity"); +} + void HeapRegionRemSet::init_for_par_iteration() { _iter_state = Unclaimed; } @@ -1399,7 +1408,7 @@ os::sleep(Thread::current(), (jlong)5000, false); G1CollectedHeap* g1h = G1CollectedHeap::heap(); - // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same + // Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same // hash bucket. HeapRegion* hr0 = g1h->region_at(0); HeapRegion* hr1 = g1h->region_at(1); diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -187,7 +187,8 @@ void clear_outgoing_entries(); enum ParIterState { Unclaimed, Claimed, Complete }; - ParIterState _iter_state; + volatile ParIterState _iter_state; + volatile jlong _iter_claimed; // Unused unless G1RecordHRRSOops is true. @@ -209,6 +210,7 @@ HeapRegion* hr); static int num_par_rem_sets(); + static void setup_remset_size(); HeapRegion* hr() const { return _other_regions.hr(); @@ -272,6 +274,19 @@ // Returns "true" iff the region's iteration is complete. bool iter_is_complete(); + // Support for claiming blocks of cards during iteration + void set_iter_claimed(size_t x) { _iter_claimed = (jlong)x; } + size_t iter_claimed() const { return (size_t)_iter_claimed; } + // Claim the next block of cards + size_t iter_claimed_next(size_t step) { + size_t current, next; + do { + current = iter_claimed(); + next = current + step; + } while (Atomic::cmpxchg((jlong)next, &_iter_claimed, (jlong)current) != (jlong)current); + return current; + } + // Initialize the given iterator to iterate over this rem set. void init_iterator(HeapRegionRemSetIterator* iter) const; diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/sparsePRT.cpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Thu Feb 11 15:52:19 2010 -0800 @@ -27,7 +27,7 @@ #define SPARSE_PRT_VERBOSE 0 -#define UNROLL_CARD_LOOPS 1 +#define UNROLL_CARD_LOOPS 1 void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) { sprt_iter->init(this); @@ -36,27 +36,32 @@ void SparsePRTEntry::init(RegionIdx_t region_ind) { _region_ind = region_ind; _next_index = NullEntry; + #if UNROLL_CARD_LOOPS - assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - _cards[0] = NullEntry; - _cards[1] = NullEntry; - _cards[2] = NullEntry; - _cards[3] = NullEntry; + assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry"); + for (int i = 0; i < cards_num(); i += UnrollFactor) { + _cards[i] = NullEntry; + _cards[i + 1] = NullEntry; + _cards[i + 2] = NullEntry; + _cards[i + 3] = NullEntry; + } #else - for (int i = 0; i < CardsPerEntry; i++) + for (int i = 0; i < cards_num(); i++) _cards[i] = NullEntry; #endif } bool SparsePRTEntry::contains_card(CardIdx_t card_index) const { #if UNROLL_CARD_LOOPS - assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - if (_cards[0] == card_index) return true; - if (_cards[1] == card_index) return true; - if (_cards[2] == card_index) return true; - if (_cards[3] == card_index) return true; + assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry"); + for (int i = 0; i < cards_num(); i += UnrollFactor) { + if (_cards[i] == card_index || + _cards[i + 1] == card_index || + _cards[i + 2] == card_index || + _cards[i + 3] == card_index) return true; + } #else - for (int i = 0; i < CardsPerEntry; i++) { + for (int i = 0; i < cards_num(); i++) { if (_cards[i] == card_index) return true; } #endif @@ -67,14 +72,16 @@ int SparsePRTEntry::num_valid_cards() const { int sum = 0; #if UNROLL_CARD_LOOPS - assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - if (_cards[0] != NullEntry) sum++; - if (_cards[1] != NullEntry) sum++; - if (_cards[2] != NullEntry) sum++; - if (_cards[3] != NullEntry) sum++; + assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry"); + for (int i = 0; i < cards_num(); i += UnrollFactor) { + sum += (_cards[i] != NullEntry); + sum += (_cards[i + 1] != NullEntry); + sum += (_cards[i + 2] != NullEntry); + sum += (_cards[i + 3] != NullEntry); + } #else - for (int i = 0; i < CardsPerEntry; i++) { - if (_cards[i] != NulLEntry) sum++; + for (int i = 0; i < cards_num(); i++) { + sum += (_cards[i] != NullEntry); } #endif // Otherwise, we're full. @@ -83,27 +90,27 @@ SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) { #if UNROLL_CARD_LOOPS - assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - CardIdx_t c = _cards[0]; - if (c == card_index) return found; - if (c == NullEntry) { _cards[0] = card_index; return added; } - c = _cards[1]; - if (c == card_index) return found; - if (c == NullEntry) { _cards[1] = card_index; return added; } - c = _cards[2]; - if (c == card_index) return found; - if (c == NullEntry) { _cards[2] = card_index; return added; } - c = _cards[3]; - if (c == card_index) return found; - if (c == NullEntry) { _cards[3] = card_index; return added; } + assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry"); + CardIdx_t c; + for (int i = 0; i < cards_num(); i += UnrollFactor) { + c = _cards[i]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i] = card_index; return added; } + c = _cards[i + 1]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i + 1] = card_index; return added; } + c = _cards[i + 2]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i + 2] = card_index; return added; } + c = _cards[i + 3]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i + 3] = card_index; return added; } + } #else - for (int i = 0; i < CardsPerEntry; i++) { + for (int i = 0; i < cards_num(); i++) { CardIdx_t c = _cards[i]; if (c == card_index) return found; - if (c == NullEntry) { - _cards[i] = card_index; - return added; - } + if (c == NullEntry) { _cards[i] = card_index; return added; } } #endif // Otherwise, we're full. @@ -112,13 +119,15 @@ void SparsePRTEntry::copy_cards(CardIdx_t* cards) const { #if UNROLL_CARD_LOOPS - assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - cards[0] = _cards[0]; - cards[1] = _cards[1]; - cards[2] = _cards[2]; - cards[3] = _cards[3]; + assert((cards_num() & (UnrollFactor - 1)) == 0, "Invalid number of cards in the entry"); + for (int i = 0; i < cards_num(); i += UnrollFactor) { + cards[i] = _cards[i]; + cards[i + 1] = _cards[i + 1]; + cards[i + 2] = _cards[i + 2]; + cards[i + 3] = _cards[i + 3]; + } #else - for (int i = 0; i < CardsPerEntry; i++) { + for (int i = 0; i < cards_num(); i++) { cards[i] = _cards[i]; } #endif @@ -133,7 +142,7 @@ RSHashTable::RSHashTable(size_t capacity) : _capacity(capacity), _capacity_mask(capacity-1), _occupied_entries(0), _occupied_cards(0), - _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)), + _entries((SparsePRTEntry*)NEW_C_HEAP_ARRAY(char, SparsePRTEntry::size() * capacity)), _buckets(NEW_C_HEAP_ARRAY(int, capacity)), _free_list(NullEntry), _free_region(0) { @@ -161,8 +170,8 @@ "_capacity too large"); // This will put -1 == NullEntry in the key field of all entries. - memset(_entries, -1, _capacity * sizeof(SparsePRTEntry)); - memset(_buckets, -1, _capacity * sizeof(int)); + memset(_entries, NullEntry, _capacity * SparsePRTEntry::size()); + memset(_buckets, NullEntry, _capacity * sizeof(int)); _free_list = NullEntry; _free_region = 0; } @@ -175,8 +184,8 @@ if (res == SparsePRTEntry::added) _occupied_cards++; #if SPARSE_PRT_VERBOSE gclog_or_tty->print_cr(" after add_card[%d]: valid-cards = %d.", - pointer_delta(e, _entries, sizeof(SparsePRTEntry)), - e->num_valid_cards()); + pointer_delta(e, _entries, SparsePRTEntry::size()), + e->num_valid_cards()); #endif assert(e->num_valid_cards() > 0, "Postcondition"); return res != SparsePRTEntry::overflow; @@ -199,6 +208,22 @@ return true; } +SparsePRTEntry* RSHashTable::get_entry(RegionIdx_t region_ind) { + int ind = (int) (region_ind & capacity_mask()); + int cur_ind = _buckets[ind]; + SparsePRTEntry* cur; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + cur_ind = cur->next_index(); + } + + if (cur_ind == NullEntry) return NULL; + // Otherwise... + assert(cur->r_ind() == region_ind, "Postcondition of loop + test above."); + assert(cur->num_valid_cards() > 0, "Inv"); + return cur; +} + bool RSHashTable::delete_entry(RegionIdx_t region_ind) { int ind = (int) (region_ind & capacity_mask()); int* prev_loc = &_buckets[ind]; @@ -225,20 +250,8 @@ int ind = (int) (region_ind & capacity_mask()); int cur_ind = _buckets[ind]; SparsePRTEntry* cur; - // XXX - // int k = 0; while (cur_ind != NullEntry && (cur = entry(cur_ind))->r_ind() != region_ind) { - /* - k++; - if (k > 10) { - gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): " - "k = %d, cur_ind = %d.", region_ind, k, cur_ind); - if (k >= 1000) { - while (1) ; - } - } - */ cur_ind = cur->next_index(); } @@ -319,7 +332,7 @@ bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) { _card_ind++; CardIdx_t ci; - if (_card_ind < SparsePRTEntry::CardsPerEntry && + if (_card_ind < SparsePRTEntry::cards_num() && ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) != SparsePRTEntry::NullEntry)) { card_index = compute_card_ind(ci); @@ -359,7 +372,7 @@ size_t RSHashTable::mem_size() const { return sizeof(this) + - capacity() * (sizeof(SparsePRTEntry) + sizeof(int)); + capacity() * (SparsePRTEntry::size() + sizeof(int)); } // ---------------------------------------------------------------------- @@ -446,6 +459,10 @@ return _next->get_cards(region_id, cards); } +SparsePRTEntry* SparsePRT::get_entry(RegionIdx_t region_id) { + return _next->get_entry(region_id); +} + bool SparsePRT::delete_entry(RegionIdx_t region_id) { return _next->delete_entry(region_id); } diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/gc_implementation/g1/sparsePRT.hpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -32,21 +32,28 @@ // insertions only enqueue old versions for deletions, but do not delete // old versions synchronously. - class SparsePRTEntry: public CHeapObj { public: - enum SomePublicConstants { - CardsPerEntry = 4, - NullEntry = -1 + NullEntry = -1, + UnrollFactor = 4 }; - private: RegionIdx_t _region_ind; int _next_index; - CardIdx_t _cards[CardsPerEntry]; - + CardIdx_t _cards[1]; + // WARNING: Don't put any data members beyond this line. Card array has, in fact, variable length. + // It should always be the last data member. public: + // Returns the size of the entry, used for entry allocation. + static size_t size() { return sizeof(SparsePRTEntry) + sizeof(CardIdx_t) * (cards_num() - 1); } + // Returns the size of the card array. + static int cards_num() { + // The number of cards should be a multiple of 4, because that's our current + // unrolling factor. + static const int s = MAX2(G1RSetSparseRegionEntries & ~(UnrollFactor - 1), UnrollFactor); + return s; + } // Set the region_ind to the given value, and delete all cards. inline void init(RegionIdx_t region_ind); @@ -134,12 +141,15 @@ bool add_card(RegionIdx_t region_id, CardIdx_t card_index); bool get_cards(RegionIdx_t region_id, CardIdx_t* cards); + bool delete_entry(RegionIdx_t region_id); bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const; void add_entry(SparsePRTEntry* e); + SparsePRTEntry* get_entry(RegionIdx_t region_id); + void clear(); size_t capacity() const { return _capacity; } @@ -148,7 +158,7 @@ size_t occupied_cards() const { return _occupied_cards; } size_t mem_size() const; - SparsePRTEntry* entry(int i) const { return &_entries[i]; } + SparsePRTEntry* entry(int i) const { return (SparsePRTEntry*)((char*)_entries + SparsePRTEntry::size() * i); } void print(); }; @@ -157,7 +167,7 @@ class RSHashTableIter VALUE_OBJ_CLASS_SPEC { int _tbl_ind; // [-1, 0.._rsht->_capacity) int _bl_ind; // [-1, 0.._rsht->_capacity) - short _card_ind; // [0..CardsPerEntry) + short _card_ind; // [0..SparsePRTEntry::cards_num()) RSHashTable* _rsht; size_t _heap_bot_card_ind; @@ -176,7 +186,7 @@ RSHashTableIter(size_t heap_bot_card_ind) : _tbl_ind(RSHashTable::NullEntry), _bl_ind(RSHashTable::NullEntry), - _card_ind((SparsePRTEntry::CardsPerEntry-1)), + _card_ind((SparsePRTEntry::cards_num() - 1)), _rsht(NULL), _heap_bot_card_ind(heap_bot_card_ind) {} @@ -185,7 +195,7 @@ _rsht = rsht; _tbl_ind = -1; // So that first increment gets to 0. _bl_ind = RSHashTable::NullEntry; - _card_ind = (SparsePRTEntry::CardsPerEntry-1); + _card_ind = (SparsePRTEntry::cards_num() - 1); } bool has_next(size_t& card_index); @@ -241,9 +251,13 @@ // If the table hold an entry for "region_ind", Copies its // cards into "cards", which must be an array of length at least - // "CardsPerEntry", and returns "true"; otherwise, returns "false". + // "SparePRTEntry::cards_num()", and returns "true"; otherwise, + // returns "false". bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards); + // Return the pointer to the entry associated with the given region. + SparsePRTEntry* get_entry(RegionIdx_t region_ind); + // If there is an entry for "region_ind", removes it and return "true"; // otherwise returns "false." bool delete_entry(RegionIdx_t region_ind); diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -339,6 +339,16 @@ return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); } + void set_card_claimed(size_t card_index) { + jbyte val = _byte_map[card_index]; + if (val == clean_card_val()) { + val = (jbyte)claimed_card_val(); + } else { + val |= (jbyte)claimed_card_val(); + } + _byte_map[card_index] = val; + } + bool claim_card(size_t card_index); bool is_card_clean(size_t card_index) { diff -r 8859772195c6 -r 0414c1049f15 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Tue Feb 09 13:56:09 2010 -0800 +++ b/src/share/vm/utilities/globalDefinitions.hpp Thu Feb 11 15:52:19 2010 -0800 @@ -139,6 +139,10 @@ const size_t G = M*K; const size_t HWperKB = K / sizeof(HeapWord); +const size_t LOG_K = 10; +const size_t LOG_M = 2 * LOG_K; +const size_t LOG_G = 2 * LOG_M; + const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint