comparison src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @ 889:15c5903cf9e1

6865703: G1: Parallelize hot card cache cleanup Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads. Reviewed-by: jmasa, tonyp
author johnc
date Mon, 03 Aug 2009 12:59:30 -0700
parents bd02caa94611
children 6cb8e9df7174
comparison
equal deleted inserted replaced
888:59726d16b30d 889:15c5903cf9e1
55 } 55 }
56 return 0; 56 return 0;
57 } 57 }
58 58
59 void ConcurrentG1Refine::init() { 59 void ConcurrentG1Refine::init() {
60 G1CollectedHeap* g1h = G1CollectedHeap::heap();
60 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { 61 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
61 G1CollectedHeap* g1h = G1CollectedHeap::heap();
62 _n_card_counts = 62 _n_card_counts =
63 (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); 63 (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
64 _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); 64 _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
65 for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; 65 for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
66 ModRefBarrierSet* bs = g1h->mr_bs(); 66 ModRefBarrierSet* bs = g1h->mr_bs();
81 _use_cache = true; 81 _use_cache = true;
82 _hot_cache_size = (1 << G1ConcRSLogCacheSize); 82 _hot_cache_size = (1 << G1ConcRSLogCacheSize);
83 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); 83 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
84 _n_hot = 0; 84 _n_hot = 0;
85 _hot_cache_idx = 0; 85 _hot_cache_idx = 0;
86
87 // For refining the cards in the hot cache in parallel
88 int n_workers = (ParallelGCThreads > 0 ?
89 g1h->workers()->total_workers() : 1);
90 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
91 _hot_cache_par_claimed_idx = 0;
86 } 92 }
87 } 93 }
88 94
89 void ConcurrentG1Refine::stop() { 95 void ConcurrentG1Refine::stop() {
90 if (_threads != NULL) { 96 if (_threads != NULL) {
159 } 165 }
160 166
161 167
162 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { 168 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
163 assert(!use_cache(), "cache should be disabled"); 169 assert(!use_cache(), "cache should be disabled");
164 int start_ind = _hot_cache_idx-1; 170 int start_idx;
165 for (int i = 0; i < _n_hot; i++) { 171
166 int ind = start_ind - i; 172 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
167 if (ind < 0) ind = ind + _hot_cache_size; 173 int end_idx = start_idx + _hot_cache_par_chunk_size;
168 jbyte* entry = _hot_cache[ind]; 174
169 if (entry != NULL) { 175 if (start_idx ==
170 g1rs->concurrentRefineOneCard(entry, worker_i); 176 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
171 } 177 // The current worker has successfully claimed the chunk [start_idx..end_idx)
172 } 178 end_idx = MIN2(end_idx, _n_hot);
173 _n_hot = 0; 179 for (int i = start_idx; i < end_idx; i++) {
174 _hot_cache_idx = 0; 180 jbyte* entry = _hot_cache[i];
181 if (entry != NULL) {
182 g1rs->concurrentRefineOneCard(entry, worker_i);
183 }
184 }
185 }
186 }
175 } 187 }
176 188
177 void ConcurrentG1Refine::clear_and_record_card_counts() { 189 void ConcurrentG1Refine::clear_and_record_card_counts() {
178 if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; 190 if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
179 _n_periods++; 191 _n_periods++;