comparison src/share/vm/gc_implementation/g1/g1RemSet.cpp @ 1261:0414c1049f15

6923991: G1: improve scalability of RSet scanning Summary: Implemented block-based work stealing. Moved copying during the rset scanning phase to the main copying phase. Made the size of rset table depend on the region size. Reviewed-by: apetrusenko, tonyp
author iveresov
date Thu, 11 Feb 2010 15:52:19 -0800
parents 6cb8e9df7174
children b81f3572f355
comparison
equal deleted inserted replaced
1260:8859772195c6 1261:0414c1049f15
153 G1CollectedHeap* _g1h; 153 G1CollectedHeap* _g1h;
154 OopsInHeapRegionClosure* _oc; 154 OopsInHeapRegionClosure* _oc;
155 G1BlockOffsetSharedArray* _bot_shared; 155 G1BlockOffsetSharedArray* _bot_shared;
156 CardTableModRefBS *_ct_bs; 156 CardTableModRefBS *_ct_bs;
157 int _worker_i; 157 int _worker_i;
158 int _block_size;
158 bool _try_claimed; 159 bool _try_claimed;
159 size_t _min_skip_distance, _max_skip_distance;
160 public: 160 public:
161 ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) : 161 ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
162 _oc(oc), 162 _oc(oc),
163 _cards(0), 163 _cards(0),
164 _cards_done(0), 164 _cards_done(0),
166 _try_claimed(false) 166 _try_claimed(false)
167 { 167 {
168 _g1h = G1CollectedHeap::heap(); 168 _g1h = G1CollectedHeap::heap();
169 _bot_shared = _g1h->bot_shared(); 169 _bot_shared = _g1h->bot_shared();
170 _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set()); 170 _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
171 _min_skip_distance = 16; 171 _block_size = MAX2<int>(G1RSetScanBlockSize, 1);
172 _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
173 } 172 }
174 173
175 void set_try_claimed() { _try_claimed = true; } 174 void set_try_claimed() { _try_claimed = true; }
176 175
177 void scanCard(size_t index, HeapRegion *r) { 176 void scanCard(size_t index, HeapRegion *r) {
223 // is true: either we're supposed to work on claimed-but-not-complete 222 // is true: either we're supposed to work on claimed-but-not-complete
224 // regions, or we successfully claimed the region. 223 // regions, or we successfully claimed the region.
225 HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i); 224 HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
226 hrrs->init_iterator(iter); 225 hrrs->init_iterator(iter);
227 size_t card_index; 226 size_t card_index;
228 size_t skip_distance = 0, current_card = 0, jump_to_card = 0; 227
229 while (iter->has_next(card_index)) { 228 // We claim cards in block so as to recude the contention. The block size is determined by
230 if (current_card < jump_to_card) { 229 // the G1RSetScanBlockSize parameter.
231 ++current_card; 230 size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
232 continue; 231 for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
232 if (current_card >= jump_to_card + _block_size) {
233 jump_to_card = hrrs->iter_claimed_next(_block_size);
233 } 234 }
235 if (current_card < jump_to_card) continue;
234 HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index); 236 HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
235 #if 0 237 #if 0
236 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n", 238 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
237 card_start, card_start + CardTableModRefBS::card_size_in_words); 239 card_start, card_start + CardTableModRefBS::card_size_in_words);
238 #endif 240 #endif
245 _g1h->push_dirty_cards_region(card_region); 247 _g1h->push_dirty_cards_region(card_region);
246 } 248 }
247 249
248 // If the card is dirty, then we will scan it during updateRS. 250 // If the card is dirty, then we will scan it during updateRS.
249 if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { 251 if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
250 if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) { 252 // We make the card as "claimed" lazily (so races are possible but they're benign),
251 scanCard(card_index, card_region); 253 // which reduces the number of duplicate scans (the rsets of the regions in the cset
252 } else if (_try_claimed) { 254 // can intersect).
253 if (jump_to_card == 0 || jump_to_card != current_card) { 255 if (!_ct_bs->is_card_claimed(card_index)) {
254 // We did some useful work in the previous iteration. 256 _ct_bs->set_card_claimed(card_index);
255 // Decrease the distance. 257 scanCard(card_index, card_region);
256 skip_distance = MAX2(skip_distance >> 1, _min_skip_distance); 258 }
257 } else {
258 // Previous iteration resulted in a claim failure.
259 // Increase the distance.
260 skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
261 }
262 jump_to_card = current_card + skip_distance;
263 }
264 } 259 }
265 ++current_card;
266 } 260 }
267 if (!_try_claimed) { 261 if (!_try_claimed) {
268 hrrs->set_iter_complete(); 262 hrrs->set_iter_complete();
269 } 263 }
270 return false; 264 return false;
297 291
298 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { 292 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
299 double rs_time_start = os::elapsedTime(); 293 double rs_time_start = os::elapsedTime();
300 HeapRegion *startRegion = calculateStartRegion(worker_i); 294 HeapRegion *startRegion = calculateStartRegion(worker_i);
301 295
302 BufferingOopsInHeapRegionClosure boc(oc); 296 ScanRSClosure scanRScl(oc, worker_i);
303 ScanRSClosure scanRScl(&boc, worker_i);
304 _g1->collection_set_iterate_from(startRegion, &scanRScl); 297 _g1->collection_set_iterate_from(startRegion, &scanRScl);
305 scanRScl.set_try_claimed(); 298 scanRScl.set_try_claimed();
306 _g1->collection_set_iterate_from(startRegion, &scanRScl); 299 _g1->collection_set_iterate_from(startRegion, &scanRScl);
307 300
308 boc.done(); 301 double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
309 double closure_app_time_sec = boc.closure_app_seconds();
310 double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
311 closure_app_time_sec;
312 double closure_app_time_ms = closure_app_time_sec * 1000.0;
313 302
314 assert( _cards_scanned != NULL, "invariant" ); 303 assert( _cards_scanned != NULL, "invariant" );
315 _cards_scanned[worker_i] = scanRScl.cards_done(); 304 _cards_scanned[worker_i] = scanRScl.cards_done();
316 305
317 _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); 306 _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
318 _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); 307 _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
319
320 double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
321 if (scan_new_refs_time_ms > 0.0) {
322 closure_app_time_ms += scan_new_refs_time_ms;
323 }
324
325 _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
326 } 308 }
327 309
328 void HRInto_G1RemSet::updateRS(int worker_i) { 310 void HRInto_G1RemSet::updateRS(int worker_i) {
329 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); 311 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
330 312
447 // If "p" has already been processed concurrently, this is 429 // If "p" has already been processed concurrently, this is
448 // idempotent. 430 // idempotent.
449 oc->do_oop(p); 431 oc->do_oop(p);
450 } 432 }
451 } 433 }
452 _g1p->record_scan_new_refs_time(worker_i, 434 double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
453 (os::elapsedTime() - scan_new_refs_start_sec) 435 _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
454 * 1000.0);
455 } 436 }
456 437
457 void HRInto_G1RemSet::cleanupHRRS() { 438 void HRInto_G1RemSet::cleanupHRRS() {
458 HeapRegionRemSet::cleanup(); 439 HeapRegionRemSet::cleanup();
459 } 440 }