Mercurial > hg > truffle
annotate src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @ 889:15c5903cf9e1
6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp
author | johnc |
---|---|
date | Mon, 03 Aug 2009 12:59:30 -0700 |
parents | bd02caa94611 |
children | 6cb8e9df7174 |
rev | line source |
---|---|
342 | 1 /* |
844 | 2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. |
342 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_concurrentG1Refine.cpp.incl" | |
27 | |
28 ConcurrentG1Refine::ConcurrentG1Refine() : | |
29 _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), | |
30 _hot_cache(NULL), | |
31 _def_use_cache(false), _use_cache(false), | |
794 | 32 _n_periods(0), _total_cards(0), _total_travs(0), |
33 _threads(NULL), _n_threads(0) | |
342 | 34 { |
35 if (G1ConcRefine) { | |
795
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
36 _n_threads = (int)thread_num(); |
794 | 37 if (_n_threads > 0) { |
38 _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); | |
795
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
39 int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); |
794 | 40 ConcurrentG1RefineThread *next = NULL; |
41 for (int i = _n_threads - 1; i >= 0; i--) { | |
795
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
42 ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); |
794 | 43 assert(t != NULL, "Conc refine should have been created"); |
44 assert(t->cg1r() == this, "Conc refine thread should refer to this"); | |
45 _threads[i] = t; | |
46 next = t; | |
47 } | |
48 } | |
342 | 49 } |
50 } | |
51 | |
795
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
52 size_t ConcurrentG1Refine::thread_num() { |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
53 if (G1ConcRefine) { |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
54 return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
55 } |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
56 return 0; |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
57 } |
215f81b4d9b3
6841831: G1: assert(contains_reference(from),"We just added it!") fires
iveresov
parents:
794
diff
changeset
|
58 |
342 | 59 void ConcurrentG1Refine::init() { |
889 | 60 G1CollectedHeap* g1h = G1CollectedHeap::heap(); |
342 | 61 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { |
62 _n_card_counts = | |
63 (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); | |
64 _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); | |
65 for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; | |
66 ModRefBarrierSet* bs = g1h->mr_bs(); | |
67 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); | |
68 CardTableModRefBS* ctbs = (CardTableModRefBS*)bs; | |
69 _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start()); | |
70 if (G1ConcRSCountTraversals) { | |
71 _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); | |
72 _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); | |
73 for (int i = 0; i < 256; i++) { | |
74 _cur_card_count_histo[i] = 0; | |
75 _cum_card_count_histo[i] = 0; | |
76 } | |
77 } | |
78 } | |
79 if (G1ConcRSLogCacheSize > 0) { | |
80 _def_use_cache = true; | |
81 _use_cache = true; | |
82 _hot_cache_size = (1 << G1ConcRSLogCacheSize); | |
83 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); | |
84 _n_hot = 0; | |
85 _hot_cache_idx = 0; | |
889 | 86 |
87 // For refining the cards in the hot cache in parallel | |
88 int n_workers = (ParallelGCThreads > 0 ? | |
89 g1h->workers()->total_workers() : 1); | |
90 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); | |
91 _hot_cache_par_claimed_idx = 0; | |
342 | 92 } |
93 } | |
94 | |
794 | 95 void ConcurrentG1Refine::stop() { |
96 if (_threads != NULL) { | |
97 for (int i = 0; i < _n_threads; i++) { | |
98 _threads[i]->stop(); | |
99 } | |
100 } | |
101 } | |
102 | |
342 | 103 ConcurrentG1Refine::~ConcurrentG1Refine() { |
104 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { | |
105 assert(_card_counts != NULL, "Logic"); | |
106 FREE_C_HEAP_ARRAY(unsigned char, _card_counts); | |
107 assert(_cur_card_count_histo != NULL, "Logic"); | |
108 FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo); | |
109 assert(_cum_card_count_histo != NULL, "Logic"); | |
110 FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo); | |
111 } | |
112 if (G1ConcRSLogCacheSize > 0) { | |
113 assert(_hot_cache != NULL, "Logic"); | |
114 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); | |
115 } | |
794 | 116 if (_threads != NULL) { |
117 for (int i = 0; i < _n_threads; i++) { | |
118 delete _threads[i]; | |
119 } | |
799
f89cf529c3c7
6849122: G1: Typo introduced during implementation of the parallel refinement
iveresov
parents:
795
diff
changeset
|
120 FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); |
342 | 121 } |
122 } | |
123 | |
794 | 124 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { |
125 if (_threads != NULL) { | |
126 for (int i = 0; i < _n_threads; i++) { | |
127 tc->do_thread(_threads[i]); | |
128 } | |
342 | 129 } |
130 } | |
131 | |
132 | |
133 int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { | |
134 size_t card_num = (card_ptr - _ct_bot); | |
135 guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds"); | |
136 unsigned char cnt = _card_counts[card_num]; | |
137 if (cnt < 255) _card_counts[card_num]++; | |
138 return cnt; | |
139 _total_travs++; | |
140 } | |
141 | |
142 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { | |
143 int count = add_card_count(card_ptr); | |
144 // Count previously unvisited cards. | |
145 if (count == 0) _total_cards++; | |
146 // We'll assume a traversal unless we store it in the cache. | |
147 if (count < G1ConcRSHotCardLimit) { | |
148 _total_travs++; | |
149 return card_ptr; | |
150 } | |
151 // Otherwise, it's hot. | |
152 jbyte* res = NULL; | |
153 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); | |
154 if (_n_hot == _hot_cache_size) { | |
155 _total_travs++; | |
156 res = _hot_cache[_hot_cache_idx]; | |
157 _n_hot--; | |
158 } | |
159 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. | |
160 _hot_cache[_hot_cache_idx] = card_ptr; | |
161 _hot_cache_idx++; | |
162 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; | |
163 _n_hot++; | |
164 return res; | |
165 } | |
166 | |
167 | |
168 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { | |
169 assert(!use_cache(), "cache should be disabled"); | |
889 | 170 int start_idx; |
171 | |
172 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once | |
173 int end_idx = start_idx + _hot_cache_par_chunk_size; | |
174 | |
175 if (start_idx == | |
176 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { | |
177 // The current worker has successfully claimed the chunk [start_idx..end_idx) | |
178 end_idx = MIN2(end_idx, _n_hot); | |
179 for (int i = start_idx; i < end_idx; i++) { | |
180 jbyte* entry = _hot_cache[i]; | |
181 if (entry != NULL) { | |
182 g1rs->concurrentRefineOneCard(entry, worker_i); | |
183 } | |
184 } | |
342 | 185 } |
186 } | |
187 } | |
188 | |
189 void ConcurrentG1Refine::clear_and_record_card_counts() { | |
190 if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; | |
191 _n_periods++; | |
192 if (G1ConcRSCountTraversals) { | |
193 for (size_t i = 0; i < _n_card_counts; i++) { | |
194 unsigned char bucket = _card_counts[i]; | |
195 _cur_card_count_histo[bucket]++; | |
196 _card_counts[i] = 0; | |
197 } | |
198 gclog_or_tty->print_cr("Card counts:"); | |
199 for (int i = 0; i < 256; i++) { | |
200 if (_cur_card_count_histo[i] > 0) { | |
201 gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]); | |
202 _cum_card_count_histo[i] += _cur_card_count_histo[i]; | |
203 _cur_card_count_histo[i] = 0; | |
204 } | |
205 } | |
206 } else { | |
207 assert(G1ConcRSLogCacheSize > 0, "Logic"); | |
208 Copy::fill_to_words((HeapWord*)(&_card_counts[0]), | |
209 _n_card_counts / HeapWordSize); | |
210 } | |
211 } | |
212 | |
213 void | |
214 ConcurrentG1Refine:: | |
215 print_card_count_histo_range(unsigned* histo, int from, int to, | |
216 float& cum_card_pct, | |
217 float& cum_travs_pct) { | |
218 unsigned cards = 0; | |
219 unsigned travs = 0; | |
220 guarantee(to <= 256, "Precondition"); | |
221 for (int i = from; i < to-1; i++) { | |
222 cards += histo[i]; | |
223 travs += histo[i] * i; | |
224 } | |
225 if (to == 256) { | |
226 unsigned histo_card_sum = 0; | |
227 unsigned histo_trav_sum = 0; | |
228 for (int i = 1; i < 255; i++) { | |
229 histo_trav_sum += histo[i] * i; | |
230 } | |
231 cards += histo[255]; | |
232 // correct traversals for the last one. | |
233 unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum); | |
234 travs += travs_255; | |
235 | |
236 } else { | |
237 cards += histo[to-1]; | |
238 travs += histo[to-1] * (to-1); | |
239 } | |
240 float fperiods = (float)_n_periods; | |
241 float f_tot_cards = (float)_total_cards/fperiods; | |
242 float f_tot_travs = (float)_total_travs/fperiods; | |
243 if (cards > 0) { | |
244 float fcards = (float)cards/fperiods; | |
245 float ftravs = (float)travs/fperiods; | |
246 if (to == 256) { | |
247 gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs); | |
248 } else { | |
249 gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs); | |
250 } | |
251 float pct_cards = fcards*100.0/f_tot_cards; | |
252 cum_card_pct += pct_cards; | |
253 float pct_travs = ftravs*100.0/f_tot_travs; | |
254 cum_travs_pct += pct_travs; | |
255 gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f", | |
256 pct_cards, cum_card_pct, | |
257 pct_travs, cum_travs_pct); | |
258 } | |
259 } | |
260 | |
261 void ConcurrentG1Refine::print_final_card_counts() { | |
262 if (!G1ConcRSCountTraversals) return; | |
263 | |
264 gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.", | |
265 _total_travs, _total_cards); | |
266 float fperiods = (float)_n_periods; | |
267 gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, " | |
268 "per collection.", (float)_total_travs/fperiods, | |
269 (float)_total_cards/fperiods); | |
270 gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct " | |
271 "dirty card.\n", | |
272 _total_cards > 0 ? | |
273 (float)_total_travs/(float)_total_cards : 0.0); | |
274 | |
275 | |
276 gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s", | |
277 "range", "# cards", "# travs", "% cards", "(cum)", | |
278 "% travs", "(cum)"); | |
279 gclog_or_tty->print_cr("------------------------------------------------------------" | |
280 "-------------"); | |
281 float cum_cards_pct = 0.0; | |
282 float cum_travs_pct = 0.0; | |
283 for (int i = 1; i < 10; i++) { | |
284 print_card_count_histo_range(_cum_card_count_histo, i, i+1, | |
285 cum_cards_pct, cum_travs_pct); | |
286 } | |
287 for (int i = 10; i < 100; i += 10) { | |
288 print_card_count_histo_range(_cum_card_count_histo, i, i+10, | |
289 cum_cards_pct, cum_travs_pct); | |
290 } | |
291 print_card_count_histo_range(_cum_card_count_histo, 100, 150, | |
292 cum_cards_pct, cum_travs_pct); | |
293 print_card_count_histo_range(_cum_card_count_histo, 150, 200, | |
294 cum_cards_pct, cum_travs_pct); | |
295 print_card_count_histo_range(_cum_card_count_histo, 150, 255, | |
296 cum_cards_pct, cum_travs_pct); | |
297 print_card_count_histo_range(_cum_card_count_histo, 255, 256, | |
298 cum_cards_pct, cum_travs_pct); | |
299 } |