# HG changeset patch # User johnc # Date 1342553045 25200 # Node ID db823a892a5575c0d464ac58f42500124bbb2ef8 # Parent d42fe3c3001d77c7fd9ca7481445b1cd3171a34b 7182260: G1: Fine grain RSet freeing bottleneck Summary: Chain the fine grain PerRegionTables in an individual RSet together and free them in bulk using a single operation. Reviewed-by: johnc, brutisso Contributed-by: Thomas Schatzl diff -r d42fe3c3001d -r db823a892a55 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Jul 17 14:57:02 2012 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Jul 17 12:24:05 2012 -0700 @@ -34,8 +34,6 @@ #include "utilities/bitMap.inline.hpp" #include "utilities/globalDefinitions.hpp" -// OtherRegionsTable - class PerRegionTable: public CHeapObj { friend class OtherRegionsTable; friend class HeapRegionRemSetIterator; @@ -44,19 +42,17 @@ BitMap _bm; jint _occupied; - // next pointer for free/allocated lis + // next pointer for free/allocated 'all' list PerRegionTable* _next; - static PerRegionTable* _free_list; + // prev pointer for the allocated 'all' list + PerRegionTable* _prev; -#ifdef _MSC_VER - // For some reason even though the classes are marked as friend they are unable - // to access CardsPerRegion when private/protected. Only the windows c++ compiler - // says this Sun CC and linux gcc don't have a problem with access when private + // next pointer in collision list + PerRegionTable * _collision_list_next; - public: - -#endif // _MSC_VER + // Global free list of PRTs + static PerRegionTable* _free_list; protected: // We need access in order to union things into the base table. @@ -69,7 +65,8 @@ PerRegionTable(HeapRegion* hr) : _hr(hr), _occupied(0), - _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */) + _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */), + _collision_list_next(NULL), _next(NULL), _prev(NULL) {} void add_card_work(CardIdx_t from_card, bool par) { @@ -126,9 +123,13 @@ return _occupied; } - void init(HeapRegion* hr) { + void init(HeapRegion* hr, bool clear_links_to_all_list) { + if (clear_links_to_all_list) { + set_next(NULL); + set_prev(NULL); + } _hr = hr; - _next = NULL; + _collision_list_next = NULL; _occupied = 0; _bm.clear(); } @@ -175,22 +176,25 @@ return _bm.at(card_ind); } - PerRegionTable* next() const { return _next; } - void set_next(PerRegionTable* nxt) { _next = nxt; } - PerRegionTable** next_addr() { return &_next; } - - static void free(PerRegionTable* prt) { + // Bulk-free the PRTs from prt to last, assumes that they are + // linked together using their _next field. + static void bulk_free(PerRegionTable* prt, PerRegionTable* last) { while (true) { PerRegionTable* fl = _free_list; - prt->set_next(fl); - PerRegionTable* res = - (PerRegionTable*) - Atomic::cmpxchg_ptr(prt, &_free_list, fl); - if (res == fl) return; + last->set_next(fl); + PerRegionTable* res = (PerRegionTable*) Atomic::cmpxchg_ptr(prt, &_free_list, fl); + if (res == fl) { + return; + } } ShouldNotReachHere(); } + static void free(PerRegionTable* prt) { + bulk_free(prt, prt); + } + + // Returns an initialized PerRegionTable instance. static PerRegionTable* alloc(HeapRegion* hr) { PerRegionTable* fl = _free_list; while (fl != NULL) { @@ -199,7 +203,7 @@ (PerRegionTable*) Atomic::cmpxchg_ptr(nxt, &_free_list, fl); if (res == fl) { - fl->init(hr); + fl->init(hr, true); return fl; } else { fl = _free_list; @@ -209,6 +213,31 @@ return new PerRegionTable(hr); } + PerRegionTable* next() const { return _next; } + void set_next(PerRegionTable* next) { _next = next; } + PerRegionTable* prev() const { return _prev; } + void set_prev(PerRegionTable* prev) { _prev = prev; } + + // Accessor and Modification routines for the pointer for the + // singly linked collision list that links the PRTs within the + // OtherRegionsTable::_fine_grain_regions hash table. + // + // It might be useful to also make the collision list doubly linked + // to avoid iteration over the collisions list during scrubbing/deletion. + // OTOH there might not be many collisions. + + PerRegionTable* collision_list_next() const { + return _collision_list_next; + } + + void set_collision_list_next(PerRegionTable* next) { + _collision_list_next = next; + } + + PerRegionTable** collision_list_next_addr() { + return &_collision_list_next; + } + static size_t fl_mem_size() { PerRegionTable* cur = _free_list; size_t res = 0; @@ -234,6 +263,7 @@ _coarse_map(G1CollectedHeap::heap()->max_regions(), false /* in-resource-area */), _fine_grain_regions(NULL), + _first_all_fine_prts(NULL), _last_all_fine_prts(NULL), _n_fine_entries(0), _n_coarse_entries(0), _fine_eviction_start(0), _sparse_table(hr) @@ -264,6 +294,66 @@ } } +void OtherRegionsTable::link_to_all(PerRegionTable* prt) { + // We always append to the beginning of the list for convenience; + // the order of entries in this list does not matter. + if (_first_all_fine_prts != NULL) { + assert(_first_all_fine_prts->prev() == NULL, "invariant"); + _first_all_fine_prts->set_prev(prt); + prt->set_next(_first_all_fine_prts); + } else { + // this is the first element we insert. Adjust the "last" pointer + _last_all_fine_prts = prt; + assert(prt->next() == NULL, "just checking"); + } + // the new element is always the first element without a predecessor + prt->set_prev(NULL); + _first_all_fine_prts = prt; + + assert(prt->prev() == NULL, "just checking"); + assert(_first_all_fine_prts == prt, "just checking"); + assert((_first_all_fine_prts == NULL && _last_all_fine_prts == NULL) || + (_first_all_fine_prts != NULL && _last_all_fine_prts != NULL), + "just checking"); + assert(_last_all_fine_prts == NULL || _last_all_fine_prts->next() == NULL, + "just checking"); + assert(_first_all_fine_prts == NULL || _first_all_fine_prts->prev() == NULL, + "just checking"); +} + +void OtherRegionsTable::unlink_from_all(PerRegionTable* prt) { + if (prt->prev() != NULL) { + assert(_first_all_fine_prts != prt, "just checking"); + prt->prev()->set_next(prt->next()); + // removing the last element in the list? + if (_last_all_fine_prts == prt) { + _last_all_fine_prts = prt->prev(); + } + } else { + assert(_first_all_fine_prts == prt, "just checking"); + _first_all_fine_prts = prt->next(); + // list is empty now? + if (_first_all_fine_prts == NULL) { + _last_all_fine_prts = NULL; + } + } + + if (prt->next() != NULL) { + prt->next()->set_prev(prt->prev()); + } + + prt->set_next(NULL); + prt->set_prev(NULL); + + assert((_first_all_fine_prts == NULL && _last_all_fine_prts == NULL) || + (_first_all_fine_prts != NULL && _last_all_fine_prts != NULL), + "just checking"); + assert(_last_all_fine_prts == NULL || _last_all_fine_prts->next() == NULL, + "just checking"); + assert(_first_all_fine_prts == NULL || _first_all_fine_prts->prev() == NULL, + "just checking"); +} + int** OtherRegionsTable::_from_card_cache = NULL; size_t OtherRegionsTable::_from_card_cache_max_regions = 0; size_t OtherRegionsTable::_from_card_cache_mem_size = 0; @@ -386,13 +476,16 @@ if (_n_fine_entries == _max_fine_entries) { prt = delete_region_table(); + // There is no need to clear the links to the 'all' list here: + // prt will be reused immediately, i.e. remain in the 'all' list. + prt->init(from_hr, false /* clear_links_to_all_list */); } else { prt = PerRegionTable::alloc(from_hr); + link_to_all(prt); } - prt->init(from_hr); PerRegionTable* first_prt = _fine_grain_regions[ind]; - prt->set_next(first_prt); // XXX Maybe move to init? + prt->set_collision_list_next(first_prt); _fine_grain_regions[ind] = prt; _n_fine_entries++; @@ -438,7 +531,7 @@ assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); PerRegionTable* prt = _fine_grain_regions[ind]; while (prt != NULL && prt->hr() != hr) { - prt = prt->next(); + prt = prt->collision_list_next(); } // Loop postcondition is the method postcondition. return prt; @@ -473,8 +566,8 @@ max_ind = i; max_occ = cur_occ; } - prev = cur->next_addr(); - cur = cur->next(); + prev = cur->collision_list_next_addr(); + cur = cur->collision_list_next(); } i = i + _fine_eviction_stride; if (i >= _n_fine_entries) i = i - _n_fine_entries; @@ -503,7 +596,7 @@ } // Unsplice. - *max_prev = max->next(); + *max_prev = max->collision_list_next(); Atomic::inc(&_n_coarsenings); _n_fine_entries--; return max; @@ -534,7 +627,7 @@ PerRegionTable* cur = _fine_grain_regions[i]; PerRegionTable** prev = &_fine_grain_regions[i]; while (cur != NULL) { - PerRegionTable* nxt = cur->next(); + PerRegionTable* nxt = cur->collision_list_next(); // If the entire region is dead, eliminate. if (G1RSScrubVerbose) { gclog_or_tty->print_cr(" For other region %u:", @@ -542,11 +635,12 @@ } if (!region_bm->at((size_t) cur->hr()->hrs_index())) { *prev = nxt; - cur->set_next(NULL); + cur->set_collision_list_next(NULL); _n_fine_entries--; if (G1RSScrubVerbose) { gclog_or_tty->print_cr(" deleted via region map."); } + unlink_from_all(cur); PerRegionTable::free(cur); } else { // Do fine-grain elimination. @@ -560,11 +654,12 @@ // Did that empty the table completely? if (cur->occupied() == 0) { *prev = nxt; - cur->set_next(NULL); + cur->set_collision_list_next(NULL); _n_fine_entries--; + unlink_from_all(cur); PerRegionTable::free(cur); } else { - prev = cur->next_addr(); + prev = cur->collision_list_next_addr(); } } cur = nxt; @@ -587,13 +682,15 @@ size_t OtherRegionsTable::occ_fine() const { size_t sum = 0; - for (size_t i = 0; i < _max_fine_entries; i++) { - PerRegionTable* cur = _fine_grain_regions[i]; - while (cur != NULL) { - sum += cur->occupied(); - cur = cur->next(); - } + + size_t num = 0; + PerRegionTable * cur = _first_all_fine_prts; + while (cur != NULL) { + sum += cur->occupied(); + cur = cur->next(); + num++; } + guarantee(num == _n_fine_entries, "just checking"); return sum; } @@ -609,12 +706,10 @@ // Cast away const in this case. MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); size_t sum = 0; - for (size_t i = 0; i < _max_fine_entries; i++) { - PerRegionTable* cur = _fine_grain_regions[i]; - while (cur != NULL) { - sum += cur->mem_size(); - cur = cur->next(); - } + PerRegionTable * cur = _first_all_fine_prts; + while (cur != NULL) { + sum += cur->mem_size(); + cur = cur->next(); } sum += (sizeof(PerRegionTable*) * _max_fine_entries); sum += (_coarse_map.size_in_words() * HeapWordSize); @@ -632,22 +727,24 @@ } void OtherRegionsTable::clear_fcc() { + size_t hrs_idx = hr()->hrs_index(); for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { - _from_card_cache[i][hr()->hrs_index()] = -1; + _from_card_cache[i][hrs_idx] = -1; } } void OtherRegionsTable::clear() { MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); - for (size_t i = 0; i < _max_fine_entries; i++) { - PerRegionTable* cur = _fine_grain_regions[i]; - while (cur != NULL) { - PerRegionTable* nxt = cur->next(); - PerRegionTable::free(cur); - cur = nxt; - } - _fine_grain_regions[i] = NULL; + // if there are no entries, skip this step + if (_first_all_fine_prts != NULL) { + guarantee(_first_all_fine_prts != NULL && _last_all_fine_prts != NULL, "just checking"); + PerRegionTable::bulk_free(_first_all_fine_prts, _last_all_fine_prts); + memset(_fine_grain_regions, 0, _max_fine_entries * sizeof(_fine_grain_regions[0])); + } else { + guarantee(_first_all_fine_prts == NULL && _last_all_fine_prts == NULL, "just checking"); } + + _first_all_fine_prts = _last_all_fine_prts = NULL; _sparse_table.clear(); _coarse_map.clear(); _n_fine_entries = 0; @@ -686,12 +783,13 @@ PerRegionTable** prev_addr = &_fine_grain_regions[ind]; PerRegionTable* prt = *prev_addr; while (prt != NULL && prt->hr() != hr) { - prev_addr = prt->next_addr(); - prt = prt->next(); + prev_addr = prt->collision_list_next_addr(); + prt = prt->collision_list_next(); } if (prt != NULL) { assert(prt->hr() == hr, "Loop postcondition."); - *prev_addr = prt->next(); + *prev_addr = prt->collision_list_next(); + unlink_from_all(prt); PerRegionTable::free(prt); _n_fine_entries--; return true; @@ -793,7 +891,6 @@ G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); gclog_or_tty->print_cr(" Card " PTR_FORMAT, card_start); } - if (iter.n_yielded() != occupied()) { gclog_or_tty->print_cr("Yielded disagrees with occupied:"); gclog_or_tty->print_cr(" %6d yielded (%6d coarse, %6d fine).", @@ -905,7 +1002,7 @@ while (!fine_has_next()) { if (_cur_region_cur_card == (size_t) HeapRegion::CardsPerRegion) { _cur_region_cur_card = 0; - _fine_cur_prt = _fine_cur_prt->next(); + _fine_cur_prt = _fine_cur_prt->collision_list_next(); } if (_fine_cur_prt == NULL) { fine_find_next_non_null_prt(); diff -r d42fe3c3001d -r db823a892a55 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Jul 17 14:57:02 2012 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Jul 17 12:24:05 2012 -0700 @@ -82,6 +82,14 @@ PerRegionTable** _fine_grain_regions; size_t _n_fine_entries; + // The fine grain remembered sets are doubly linked together using + // their 'next' and 'prev' fields. + // This allows fast bulk freeing of all the fine grain remembered + // set entries, and fast finding of all of them without iterating + // over the _fine_grain_regions table. + PerRegionTable * _first_all_fine_prts; + PerRegionTable * _last_all_fine_prts; + // Used to sample a subset of the fine grain PRTs to determine which // PRT to evict and coarsen. size_t _fine_eviction_start; @@ -114,6 +122,11 @@ static size_t _from_card_cache_max_regions; static size_t _from_card_cache_mem_size; + // link/add the given fine grain remembered set into the "all" list + void link_to_all(PerRegionTable * prt); + // unlink/remove the given fine grain remembered set into the "all" list + void unlink_from_all(PerRegionTable * prt); + public: OtherRegionsTable(HeapRegion* hr);