Mercurial > hg > truffle
annotate src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 453:c96030fff130
6684579: SoftReference processing can be made more efficient
Summary: For current soft-ref clearing policies, we can decide at marking time if a soft-reference will definitely not be cleared, postponing the decision of whether it will definitely be cleared to the final reference processing phase. This can be especially beneficial in the case of concurrent collectors where the marking is usually concurrent but reference processing is usually not.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Thu, 20 Nov 2008 16:56:09 -0800 |
parents | 0edda524b58c |
children | 27a80744a83b |
rev | line source |
---|---|
342 | 1 /* |
2 * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_concurrentMark.cpp.incl" | |
27 | |
28 // | |
29 // CMS Bit Map Wrapper | |
30 | |
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): | |
32 _bm((uintptr_t*)NULL,0), | |
33 _shifter(shifter) { | |
34 _bmStartWord = (HeapWord*)(rs.base()); | |
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes | |
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up( | |
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); | |
38 | |
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); | |
40 // For now we'll just commit all of the bit map up fromt. | |
41 // Later on we'll try to be more parsimonious with swap. | |
42 guarantee(_virtual_space.initialize(brs, brs.size()), | |
43 "couldn't reseve backing store for CMS bit map"); | |
44 assert(_virtual_space.committed_size() == brs.size(), | |
45 "didn't reserve backing store for all of CMS bit map?"); | |
46 _bm.set_map((uintptr_t*)_virtual_space.low()); | |
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= | |
48 _bmWordSize, "inconsistency in bit map sizing"); | |
49 _bm.set_size(_bmWordSize >> _shifter); | |
50 } | |
51 | |
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, | |
53 HeapWord* limit) const { | |
54 // First we must round addr *up* to a possible object boundary. | |
55 addr = (HeapWord*)align_size_up((intptr_t)addr, | |
56 HeapWordSize << _shifter); | |
57 size_t addrOffset = heapWordToOffset(addr); | |
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize; | |
59 size_t limitOffset = heapWordToOffset(limit); | |
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); | |
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset); | |
62 assert(nextAddr >= addr, "get_next_one postcondition"); | |
63 assert(nextAddr == limit || isMarked(nextAddr), | |
64 "get_next_one postcondition"); | |
65 return nextAddr; | |
66 } | |
67 | |
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, | |
69 HeapWord* limit) const { | |
70 size_t addrOffset = heapWordToOffset(addr); | |
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize; | |
72 size_t limitOffset = heapWordToOffset(limit); | |
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); | |
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); | |
75 assert(nextAddr >= addr, "get_next_one postcondition"); | |
76 assert(nextAddr == limit || !isMarked(nextAddr), | |
77 "get_next_one postcondition"); | |
78 return nextAddr; | |
79 } | |
80 | |
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { | |
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); | |
83 return (int) (diff >> _shifter); | |
84 } | |
85 | |
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { | |
87 HeapWord* left = MAX2(_bmStartWord, mr.start()); | |
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); | |
89 if (right > left) { | |
90 // Right-open interval [leftOffset, rightOffset). | |
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); | |
92 } else { | |
93 return true; | |
94 } | |
95 } | |
96 | |
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, | |
98 size_t from_start_index, | |
99 HeapWord* to_start_word, | |
100 size_t word_num) { | |
101 _bm.mostly_disjoint_range_union(from_bitmap, | |
102 from_start_index, | |
103 heapWordToOffset(to_start_word), | |
104 word_num); | |
105 } | |
106 | |
107 #ifndef PRODUCT | |
108 bool CMBitMapRO::covers(ReservedSpace rs) const { | |
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); | |
110 assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, | |
111 "size inconsistency"); | |
112 return _bmStartWord == (HeapWord*)(rs.base()) && | |
113 _bmWordSize == rs.size()>>LogHeapWordSize; | |
114 } | |
115 #endif | |
116 | |
117 void CMBitMap::clearAll() { | |
118 _bm.clear(); | |
119 return; | |
120 } | |
121 | |
122 void CMBitMap::markRange(MemRegion mr) { | |
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); | |
124 assert(!mr.is_empty(), "unexpected empty region"); | |
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == | |
126 ((HeapWord *) mr.end())), | |
127 "markRange memory region end is not card aligned"); | |
128 // convert address range into offset range | |
129 _bm.at_put_range(heapWordToOffset(mr.start()), | |
130 heapWordToOffset(mr.end()), true); | |
131 } | |
132 | |
133 void CMBitMap::clearRange(MemRegion mr) { | |
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); | |
135 assert(!mr.is_empty(), "unexpected empty region"); | |
136 // convert address range into offset range | |
137 _bm.at_put_range(heapWordToOffset(mr.start()), | |
138 heapWordToOffset(mr.end()), false); | |
139 } | |
140 | |
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, | |
142 HeapWord* end_addr) { | |
143 HeapWord* start = getNextMarkedWordAddress(addr); | |
144 start = MIN2(start, end_addr); | |
145 HeapWord* end = getNextUnmarkedWordAddress(start); | |
146 end = MIN2(end, end_addr); | |
147 assert(start <= end, "Consistency check"); | |
148 MemRegion mr(start, end); | |
149 if (!mr.is_empty()) { | |
150 clearRange(mr); | |
151 } | |
152 return mr; | |
153 } | |
154 | |
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : | |
156 _base(NULL), _cm(cm) | |
157 #ifdef ASSERT | |
158 , _drain_in_progress(false) | |
159 , _drain_in_progress_yields(false) | |
160 #endif | |
161 {} | |
162 | |
163 void CMMarkStack::allocate(size_t size) { | |
164 _base = NEW_C_HEAP_ARRAY(oop, size); | |
165 if (_base == NULL) | |
166 vm_exit_during_initialization("Failed to allocate " | |
167 "CM region mark stack"); | |
168 _index = 0; | |
169 // QQQQ cast ... | |
170 _capacity = (jint) size; | |
171 _oops_do_bound = -1; | |
172 NOT_PRODUCT(_max_depth = 0); | |
173 } | |
174 | |
175 CMMarkStack::~CMMarkStack() { | |
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); | |
177 } | |
178 | |
179 void CMMarkStack::par_push(oop ptr) { | |
180 while (true) { | |
181 if (isFull()) { | |
182 _overflow = true; | |
183 return; | |
184 } | |
185 // Otherwise... | |
186 jint index = _index; | |
187 jint next_index = index+1; | |
188 jint res = Atomic::cmpxchg(next_index, &_index, index); | |
189 if (res == index) { | |
190 _base[index] = ptr; | |
191 // Note that we don't maintain this atomically. We could, but it | |
192 // doesn't seem necessary. | |
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); | |
194 return; | |
195 } | |
196 // Otherwise, we need to try again. | |
197 } | |
198 } | |
199 | |
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { | |
201 while (true) { | |
202 if (isFull()) { | |
203 _overflow = true; | |
204 return; | |
205 } | |
206 // Otherwise... | |
207 jint index = _index; | |
208 jint next_index = index + n; | |
209 if (next_index > _capacity) { | |
210 _overflow = true; | |
211 return; | |
212 } | |
213 jint res = Atomic::cmpxchg(next_index, &_index, index); | |
214 if (res == index) { | |
215 for (int i = 0; i < n; i++) { | |
216 int ind = index + i; | |
217 assert(ind < _capacity, "By overflow test above."); | |
218 _base[ind] = ptr_arr[i]; | |
219 } | |
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); | |
221 return; | |
222 } | |
223 // Otherwise, we need to try again. | |
224 } | |
225 } | |
226 | |
227 | |
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { | |
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); | |
230 jint start = _index; | |
231 jint next_index = start + n; | |
232 if (next_index > _capacity) { | |
233 _overflow = true; | |
234 return; | |
235 } | |
236 // Otherwise. | |
237 _index = next_index; | |
238 for (int i = 0; i < n; i++) { | |
239 int ind = start + i; | |
240 guarantee(ind < _capacity, "By overflow test above."); | |
241 _base[ind] = ptr_arr[i]; | |
242 } | |
243 } | |
244 | |
245 | |
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { | |
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); | |
248 jint index = _index; | |
249 if (index == 0) { | |
250 *n = 0; | |
251 return false; | |
252 } else { | |
253 int k = MIN2(max, index); | |
254 jint new_ind = index - k; | |
255 for (int j = 0; j < k; j++) { | |
256 ptr_arr[j] = _base[new_ind + j]; | |
257 } | |
258 _index = new_ind; | |
259 *n = k; | |
260 return true; | |
261 } | |
262 } | |
263 | |
264 | |
265 CMRegionStack::CMRegionStack() : _base(NULL) {} | |
266 | |
267 void CMRegionStack::allocate(size_t size) { | |
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size); | |
269 if (_base == NULL) | |
270 vm_exit_during_initialization("Failed to allocate " | |
271 "CM region mark stack"); | |
272 _index = 0; | |
273 // QQQQ cast ... | |
274 _capacity = (jint) size; | |
275 } | |
276 | |
277 CMRegionStack::~CMRegionStack() { | |
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); | |
279 } | |
280 | |
281 void CMRegionStack::push(MemRegion mr) { | |
282 assert(mr.word_size() > 0, "Precondition"); | |
283 while (true) { | |
284 if (isFull()) { | |
285 _overflow = true; | |
286 return; | |
287 } | |
288 // Otherwise... | |
289 jint index = _index; | |
290 jint next_index = index+1; | |
291 jint res = Atomic::cmpxchg(next_index, &_index, index); | |
292 if (res == index) { | |
293 _base[index] = mr; | |
294 return; | |
295 } | |
296 // Otherwise, we need to try again. | |
297 } | |
298 } | |
299 | |
300 MemRegion CMRegionStack::pop() { | |
301 while (true) { | |
302 // Otherwise... | |
303 jint index = _index; | |
304 | |
305 if (index == 0) { | |
306 return MemRegion(); | |
307 } | |
308 jint next_index = index-1; | |
309 jint res = Atomic::cmpxchg(next_index, &_index, index); | |
310 if (res == index) { | |
311 MemRegion mr = _base[next_index]; | |
312 if (mr.start() != NULL) { | |
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" ); | |
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); | |
315 return mr; | |
316 } else { | |
317 // that entry was invalidated... let's skip it | |
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" ); | |
319 } | |
320 } | |
321 // Otherwise, we need to try again. | |
322 } | |
323 } | |
324 | |
325 bool CMRegionStack::invalidate_entries_into_cset() { | |
326 bool result = false; | |
327 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
328 for (int i = 0; i < _oops_do_bound; ++i) { | |
329 MemRegion mr = _base[i]; | |
330 if (mr.start() != NULL) { | |
331 tmp_guarantee_CM( mr.end() != NULL, "invariant"); | |
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); | |
333 HeapRegion* hr = g1h->heap_region_containing(mr.start()); | |
334 tmp_guarantee_CM( hr != NULL, "invariant" ); | |
335 if (hr->in_collection_set()) { | |
336 // The region points into the collection set | |
337 _base[i] = MemRegion(); | |
338 result = true; | |
339 } | |
340 } else { | |
341 // that entry was invalidated... let's skip it | |
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" ); | |
343 } | |
344 } | |
345 return result; | |
346 } | |
347 | |
348 template<class OopClosureClass> | |
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { | |
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after | |
351 || SafepointSynchronize::is_at_safepoint(), | |
352 "Drain recursion must be yield-safe."); | |
353 bool res = true; | |
354 debug_only(_drain_in_progress = true); | |
355 debug_only(_drain_in_progress_yields = yield_after); | |
356 while (!isEmpty()) { | |
357 oop newOop = pop(); | |
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); | |
359 assert(newOop->is_oop(), "Expected an oop"); | |
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), | |
361 "only grey objects on this stack"); | |
362 // iterate over the oops in this oop, marking and pushing | |
363 // the ones in CMS generation. | |
364 newOop->oop_iterate(cl); | |
365 if (yield_after && _cm->do_yield_check()) { | |
366 res = false; break; | |
367 } | |
368 } | |
369 debug_only(_drain_in_progress = false); | |
370 return res; | |
371 } | |
372 | |
373 void CMMarkStack::oops_do(OopClosure* f) { | |
374 if (_index == 0) return; | |
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, | |
376 "Bound must be set."); | |
377 for (int i = 0; i < _oops_do_bound; i++) { | |
378 f->do_oop(&_base[i]); | |
379 } | |
380 _oops_do_bound = -1; | |
381 } | |
382 | |
383 bool ConcurrentMark::not_yet_marked(oop obj) const { | |
384 return (_g1h->is_obj_ill(obj) | |
385 || (_g1h->is_in_permanent(obj) | |
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); | |
387 } | |
388 | |
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away | |
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list | |
391 #endif // _MSC_VER | |
392 | |
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs, | |
394 int max_regions) : | |
395 _markBitMap1(rs, MinObjAlignment - 1), | |
396 _markBitMap2(rs, MinObjAlignment - 1), | |
397 | |
398 _parallel_marking_threads(0), | |
399 _sleep_factor(0.0), | |
400 _marking_task_overhead(1.0), | |
401 _cleanup_sleep_factor(0.0), | |
402 _cleanup_task_overhead(1.0), | |
403 _region_bm(max_regions, false /* in_resource_area*/), | |
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> | |
405 CardTableModRefBS::card_shift, | |
406 false /* in_resource_area*/), | |
407 _prevMarkBitMap(&_markBitMap1), | |
408 _nextMarkBitMap(&_markBitMap2), | |
409 _at_least_one_mark_complete(false), | |
410 | |
411 _markStack(this), | |
412 _regionStack(), | |
413 // _finger set in set_non_marking_state | |
414 | |
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), | |
416 // _active_tasks set in set_non_marking_state | |
417 // _tasks set inside the constructor | |
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)), | |
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), | |
420 | |
421 _has_overflown(false), | |
422 _concurrent(false), | |
423 | |
424 // _verbose_level set below | |
425 | |
426 _init_times(), | |
427 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), | |
428 _cleanup_times(), | |
429 _total_counting_time(0.0), | |
430 _total_rs_scrub_time(0.0), | |
431 | |
432 _parallel_workers(NULL), | |
433 _cleanup_co_tracker(G1CLGroup) | |
434 { | |
435 CMVerboseLevel verbose_level = | |
436 (CMVerboseLevel) G1MarkingVerboseLevel; | |
437 if (verbose_level < no_verbose) | |
438 verbose_level = no_verbose; | |
439 if (verbose_level > high_verbose) | |
440 verbose_level = high_verbose; | |
441 _verbose_level = verbose_level; | |
442 | |
443 if (verbose_low()) | |
444 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " | |
445 "heap end = "PTR_FORMAT, _heap_start, _heap_end); | |
446 | |
447 _markStack.allocate(G1CMStackSize); | |
448 _regionStack.allocate(G1CMRegionStackSize); | |
449 | |
450 // Create & start a ConcurrentMark thread. | |
451 if (G1ConcMark) { | |
452 _cmThread = new ConcurrentMarkThread(this); | |
453 assert(cmThread() != NULL, "CM Thread should have been created"); | |
454 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); | |
455 } else { | |
456 _cmThread = NULL; | |
457 } | |
458 _g1h = G1CollectedHeap::heap(); | |
459 assert(CGC_lock != NULL, "Where's the CGC_lock?"); | |
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); | |
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); | |
462 | |
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); | |
464 satb_qs.set_buffer_size(G1SATBLogBufferSize); | |
465 | |
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1); | |
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); | |
468 for (int i = 0 ; i < size; i++) { | |
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState; | |
470 } | |
471 | |
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); | |
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); | |
474 | |
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail | |
476 _active_tasks = _max_task_num; | |
477 for (int i = 0; i < (int) _max_task_num; ++i) { | |
478 CMTaskQueue* task_queue = new CMTaskQueue(); | |
479 task_queue->initialize(); | |
480 _task_queues->register_queue(i, task_queue); | |
481 | |
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); | |
483 _accum_task_vtime[i] = 0.0; | |
484 } | |
485 | |
486 if (ParallelMarkingThreads > ParallelGCThreads) { | |
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads " | |
488 "than ParallelGCThreads."); | |
489 } | |
490 if (ParallelGCThreads == 0) { | |
491 // if we are not running with any parallel GC threads we will not | |
492 // spawn any marking threads either | |
493 _parallel_marking_threads = 0; | |
494 _sleep_factor = 0.0; | |
495 _marking_task_overhead = 1.0; | |
496 } else { | |
497 if (ParallelMarkingThreads > 0) { | |
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc | |
499 // if both are set | |
500 | |
501 _parallel_marking_threads = ParallelMarkingThreads; | |
502 _sleep_factor = 0.0; | |
503 _marking_task_overhead = 1.0; | |
504 } else if (G1MarkingOverheadPerc > 0) { | |
505 // we will calculate the number of parallel marking threads | |
506 // based on a target overhead with respect to the soft real-time | |
507 // goal | |
508 | |
509 double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; | |
510 double overall_cm_overhead = | |
511 (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; | |
512 double cpu_ratio = 1.0 / (double) os::processor_count(); | |
513 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); | |
514 double marking_task_overhead = | |
515 overall_cm_overhead / marking_thread_num * | |
516 (double) os::processor_count(); | |
517 double sleep_factor = | |
518 (1.0 - marking_task_overhead) / marking_task_overhead; | |
519 | |
520 _parallel_marking_threads = (size_t) marking_thread_num; | |
521 _sleep_factor = sleep_factor; | |
522 _marking_task_overhead = marking_task_overhead; | |
523 } else { | |
524 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); | |
525 _sleep_factor = 0.0; | |
526 _marking_task_overhead = 1.0; | |
527 } | |
528 | |
529 if (parallel_marking_threads() > 1) | |
530 _cleanup_task_overhead = 1.0; | |
531 else | |
532 _cleanup_task_overhead = marking_task_overhead(); | |
533 _cleanup_sleep_factor = | |
534 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); | |
535 | |
536 #if 0 | |
537 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); | |
538 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); | |
539 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); | |
540 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); | |
541 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); | |
542 #endif | |
543 | |
544 guarantee( parallel_marking_threads() > 0, "peace of mind" ); | |
545 _parallel_workers = new WorkGang("Parallel Marking Threads", | |
546 (int) parallel_marking_threads(), false, true); | |
547 if (_parallel_workers == NULL) | |
548 vm_exit_during_initialization("Failed necessary allocation."); | |
549 } | |
550 | |
551 // so that the call below can read a sensible value | |
552 _heap_start = (HeapWord*) rs.base(); | |
553 set_non_marking_state(); | |
554 } | |
555 | |
556 void ConcurrentMark::update_g1_committed(bool force) { | |
557 // If concurrent marking is not in progress, then we do not need to | |
558 // update _heap_end. This has a subtle and important | |
559 // side-effect. Imagine that two evacuation pauses happen between | |
560 // marking completion and remark. The first one can grow the | |
561 // heap (hence now the finger is below the heap end). Then, the | |
562 // second one could unnecessarily push regions on the region | |
563 // stack. This causes the invariant that the region stack is empty | |
564 // at the beginning of remark to be false. By ensuring that we do | |
565 // not observe heap expansions after marking is complete, then we do | |
566 // not have this problem. | |
567 if (!concurrent_marking_in_progress() && !force) | |
568 return; | |
569 | |
570 MemRegion committed = _g1h->g1_committed(); | |
571 tmp_guarantee_CM( committed.start() == _heap_start, | |
572 "start shouldn't change" ); | |
573 HeapWord* new_end = committed.end(); | |
574 if (new_end > _heap_end) { | |
575 // The heap has been expanded. | |
576 | |
577 _heap_end = new_end; | |
578 } | |
579 // Notice that the heap can also shrink. However, this only happens | |
580 // during a Full GC (at least currently) and the entire marking | |
581 // phase will bail out and the task will not be restarted. So, let's | |
582 // do nothing. | |
583 } | |
584 | |
585 void ConcurrentMark::reset() { | |
586 // Starting values for these two. This should be called in a STW | |
587 // phase. CM will be notified of any future g1_committed expansions | |
588 // will be at the end of evacuation pauses, when tasks are | |
589 // inactive. | |
590 MemRegion committed = _g1h->g1_committed(); | |
591 _heap_start = committed.start(); | |
592 _heap_end = committed.end(); | |
593 | |
594 guarantee( _heap_start != NULL && | |
595 _heap_end != NULL && | |
596 _heap_start < _heap_end, "heap bounds should look ok" ); | |
597 | |
598 // reset all the marking data structures and any necessary flags | |
599 clear_marking_state(); | |
600 | |
601 if (verbose_low()) | |
602 gclog_or_tty->print_cr("[global] resetting"); | |
603 | |
604 // We do reset all of them, since different phases will use | |
605 // different number of active threads. So, it's easiest to have all | |
606 // of them ready. | |
607 for (int i = 0; i < (int) _max_task_num; ++i) | |
608 _tasks[i]->reset(_nextMarkBitMap); | |
609 | |
610 // we need this to make sure that the flag is on during the evac | |
611 // pause with initial mark piggy-backed | |
612 set_concurrent_marking_in_progress(); | |
613 } | |
614 | |
615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { | |
616 guarantee( active_tasks <= _max_task_num, "we should not have more" ); | |
617 | |
618 _active_tasks = active_tasks; | |
619 // Need to update the three data structures below according to the | |
620 // number of active threads for this phase. | |
621 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); | |
622 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); | |
623 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); | |
624 | |
625 _concurrent = concurrent; | |
626 // We propagate this to all tasks, not just the active ones. | |
627 for (int i = 0; i < (int) _max_task_num; ++i) | |
628 _tasks[i]->set_concurrent(concurrent); | |
629 | |
630 if (concurrent) { | |
631 set_concurrent_marking_in_progress(); | |
632 } else { | |
633 // We currently assume that the concurrent flag has been set to | |
634 // false before we start remark. At this point we should also be | |
635 // in a STW phase. | |
636 guarantee( !concurrent_marking_in_progress(), "invariant" ); | |
637 guarantee( _finger == _heap_end, "only way to get here" ); | |
638 update_g1_committed(true); | |
639 } | |
640 } | |
641 | |
642 void ConcurrentMark::set_non_marking_state() { | |
643 // We set the global marking state to some default values when we're | |
644 // not doing marking. | |
645 clear_marking_state(); | |
646 _active_tasks = 0; | |
647 clear_concurrent_marking_in_progress(); | |
648 } | |
649 | |
650 ConcurrentMark::~ConcurrentMark() { | |
651 int size = (int) MAX2(ParallelGCThreads, (size_t)1); | |
652 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; | |
653 FREE_C_HEAP_ARRAY(ParCleanupThreadState*, | |
654 _par_cleanup_thread_state); | |
655 | |
656 for (int i = 0; i < (int) _max_task_num; ++i) { | |
657 delete _task_queues->queue(i); | |
658 delete _tasks[i]; | |
659 } | |
660 delete _task_queues; | |
661 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); | |
662 } | |
663 | |
664 // This closure is used to mark refs into the g1 generation | |
665 // from external roots in the CMS bit map. | |
666 // Called at the first checkpoint. | |
667 // | |
668 | |
669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0 | |
670 #if PRINT_REACHABLE_AT_INITIAL_MARK | |
671 static FILE* reachable_file = NULL; | |
672 | |
673 class PrintReachableClosure: public OopsInGenClosure { | |
674 CMBitMap* _bm; | |
675 int _level; | |
676 public: | |
677 PrintReachableClosure(CMBitMap* bm) : | |
678 _bm(bm), _level(0) { | |
679 guarantee(reachable_file != NULL, "pre-condition"); | |
680 } | |
681 void do_oop(oop* p) { | |
682 oop obj = *p; | |
683 HeapWord* obj_addr = (HeapWord*)obj; | |
684 if (obj == NULL) return; | |
685 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", | |
686 _level, p, (void*) obj, _bm->isMarked(obj_addr)); | |
687 if (!_bm->isMarked(obj_addr)) { | |
688 _bm->mark(obj_addr); | |
689 _level++; | |
690 obj->oop_iterate(this); | |
691 _level--; | |
692 } | |
693 } | |
694 }; | |
695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK | |
696 | |
697 #define SEND_HEAP_DUMP_TO_FILE 0 | |
698 #if SEND_HEAP_DUMP_TO_FILE | |
699 static FILE* heap_dump_file = NULL; | |
700 #endif // SEND_HEAP_DUMP_TO_FILE | |
701 | |
702 void ConcurrentMark::clearNextBitmap() { | |
703 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); | |
704 | |
705 // clear the mark bitmap (no grey objects to start with). | |
706 // We need to do this in chunks and offer to yield in between | |
707 // each chunk. | |
708 HeapWord* start = _nextMarkBitMap->startWord(); | |
709 HeapWord* end = _nextMarkBitMap->endWord(); | |
710 HeapWord* cur = start; | |
711 size_t chunkSize = M; | |
712 while (cur < end) { | |
713 HeapWord* next = cur + chunkSize; | |
714 if (next > end) | |
715 next = end; | |
716 MemRegion mr(cur,next); | |
717 _nextMarkBitMap->clearRange(mr); | |
718 cur = next; | |
719 do_yield_check(); | |
720 } | |
721 } | |
722 | |
723 class NoteStartOfMarkHRClosure: public HeapRegionClosure { | |
724 public: | |
725 bool doHeapRegion(HeapRegion* r) { | |
726 if (!r->continuesHumongous()) { | |
727 r->note_start_of_marking(true); | |
728 } | |
729 return false; | |
730 } | |
731 }; | |
732 | |
733 void ConcurrentMark::checkpointRootsInitialPre() { | |
734 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
735 G1CollectorPolicy* g1p = g1h->g1_policy(); | |
736 | |
737 _has_aborted = false; | |
738 | |
739 // Find all the reachable objects... | |
740 #if PRINT_REACHABLE_AT_INITIAL_MARK | |
741 guarantee(reachable_file == NULL, "Protocol"); | |
742 char fn_buf[100]; | |
743 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); | |
744 reachable_file = fopen(fn_buf, "w"); | |
745 // clear the mark bitmap (no grey objects to start with) | |
746 _nextMarkBitMap->clearAll(); | |
747 PrintReachableClosure prcl(_nextMarkBitMap); | |
748 g1h->process_strong_roots( | |
749 false, // fake perm gen collection | |
750 SharedHeap::SO_AllClasses, | |
751 &prcl, // Regular roots | |
752 &prcl // Perm Gen Roots | |
753 ); | |
754 // The root iteration above "consumed" dirty cards in the perm gen. | |
755 // Therefore, as a shortcut, we dirty all such cards. | |
756 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); | |
757 fclose(reachable_file); | |
758 reachable_file = NULL; | |
759 // clear the mark bitmap again. | |
760 _nextMarkBitMap->clearAll(); | |
761 COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); | |
762 COMPILER2_PRESENT(DerivedPointerTable::clear()); | |
763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK | |
764 | |
765 // Initialise marking structures. This has to be done in a STW phase. | |
766 reset(); | |
767 } | |
768 | |
769 class CMMarkRootsClosure: public OopsInGenClosure { | |
770 private: | |
771 ConcurrentMark* _cm; | |
772 G1CollectedHeap* _g1h; | |
773 bool _do_barrier; | |
774 | |
775 public: | |
776 CMMarkRootsClosure(ConcurrentMark* cm, | |
777 G1CollectedHeap* g1h, | |
778 bool do_barrier) : _cm(cm), _g1h(g1h), | |
779 _do_barrier(do_barrier) { } | |
780 | |
781 virtual void do_oop(narrowOop* p) { | |
782 guarantee(false, "NYI"); | |
783 } | |
784 | |
785 virtual void do_oop(oop* p) { | |
786 oop thisOop = *p; | |
787 if (thisOop != NULL) { | |
788 assert(thisOop->is_oop() || thisOop->mark() == NULL, | |
789 "expected an oop, possibly with mark word displaced"); | |
790 HeapWord* addr = (HeapWord*)thisOop; | |
791 if (_g1h->is_in_g1_reserved(addr)) { | |
792 _cm->grayRoot(thisOop); | |
793 } | |
794 } | |
795 if (_do_barrier) { | |
796 assert(!_g1h->is_in_g1_reserved(p), | |
797 "Should be called on external roots"); | |
798 do_barrier(p); | |
799 } | |
800 } | |
801 }; | |
802 | |
803 void ConcurrentMark::checkpointRootsInitialPost() { | |
804 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
805 | |
806 // For each region note start of marking. | |
807 NoteStartOfMarkHRClosure startcl; | |
808 g1h->heap_region_iterate(&startcl); | |
809 | |
810 // Start weak-reference discovery. | |
811 ReferenceProcessor* rp = g1h->ref_processor(); | |
812 rp->verify_no_references_recorded(); | |
813 rp->enable_discovery(); // enable ("weak") refs discovery | |
453
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
814 rp->snap_policy(false); // snapshot the soft ref policy to be used in this cycle |
342 | 815 |
816 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
817 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); | |
818 satb_mq_set.set_active_all_threads(true); | |
819 | |
820 // update_g1_committed() will be called at the end of an evac pause | |
821 // when marking is on. So, it's also called at the end of the | |
822 // initial-mark pause to update the heap end, if the heap expands | |
823 // during it. No need to call it here. | |
824 | |
825 guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); | |
826 | |
827 size_t max_marking_threads = | |
828 MAX2((size_t) 1, parallel_marking_threads()); | |
829 for (int i = 0; i < (int)_max_task_num; ++i) { | |
830 _tasks[i]->enable_co_tracker(); | |
831 if (i < (int) max_marking_threads) | |
832 _tasks[i]->reset_co_tracker(marking_task_overhead()); | |
833 else | |
834 _tasks[i]->reset_co_tracker(0.0); | |
835 } | |
836 } | |
837 | |
838 // Checkpoint the roots into this generation from outside | |
839 // this generation. [Note this initial checkpoint need only | |
840 // be approximate -- we'll do a catch up phase subsequently.] | |
841 void ConcurrentMark::checkpointRootsInitial() { | |
842 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); | |
843 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
844 | |
845 double start = os::elapsedTime(); | |
846 GCOverheadReporter::recordSTWStart(start); | |
847 | |
848 // If there has not been a GC[n-1] since last GC[n] cycle completed, | |
849 // precede our marking with a collection of all | |
850 // younger generations to keep floating garbage to a minimum. | |
851 // YSR: we won't do this for now -- it's an optimization to be | |
852 // done post-beta. | |
853 | |
854 // YSR: ignoring weak refs for now; will do at bug fixing stage | |
855 // EVM: assert(discoveredRefsAreClear()); | |
856 | |
857 | |
858 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); | |
859 g1p->record_concurrent_mark_init_start(); | |
860 checkpointRootsInitialPre(); | |
861 | |
862 // YSR: when concurrent precleaning is in place, we'll | |
863 // need to clear the cached card table here | |
864 | |
865 ResourceMark rm; | |
866 HandleMark hm; | |
867 | |
868 g1h->ensure_parsability(false); | |
869 g1h->perm_gen()->save_marks(); | |
870 | |
871 CMMarkRootsClosure notOlder(this, g1h, false); | |
872 CMMarkRootsClosure older(this, g1h, true); | |
873 | |
874 g1h->set_marking_started(); | |
875 g1h->rem_set()->prepare_for_younger_refs_iterate(false); | |
876 | |
877 g1h->process_strong_roots(false, // fake perm gen collection | |
878 SharedHeap::SO_AllClasses, | |
879 ¬Older, // Regular roots | |
880 &older // Perm Gen Roots | |
881 ); | |
882 checkpointRootsInitialPost(); | |
883 | |
884 // Statistics. | |
885 double end = os::elapsedTime(); | |
886 _init_times.add((end - start) * 1000.0); | |
887 GCOverheadReporter::recordSTWEnd(end); | |
888 | |
889 g1p->record_concurrent_mark_init_end(); | |
890 } | |
891 | |
892 /* | |
893 Notice that in the next two methods, we actually leave the STS | |
894 during the barrier sync and join it immediately afterwards. If we | |
895 do not do this, this then the following deadlock can occur: one | |
896 thread could be in the barrier sync code, waiting for the other | |
897 thread to also sync up, whereas another one could be trying to | |
898 yield, while also waiting for the other threads to sync up too. | |
899 | |
900 Because the thread that does the sync barrier has left the STS, it | |
901 is possible to be suspended for a Full GC or an evacuation pause | |
902 could occur. This is actually safe, since the entering the sync | |
903 barrier is one of the last things do_marking_step() does, and it | |
904 doesn't manipulate any data structures afterwards. | |
905 */ | |
906 | |
907 void ConcurrentMark::enter_first_sync_barrier(int task_num) { | |
908 if (verbose_low()) | |
909 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); | |
910 | |
911 ConcurrentGCThread::stsLeave(); | |
912 _first_overflow_barrier_sync.enter(); | |
913 ConcurrentGCThread::stsJoin(); | |
914 // at this point everyone should have synced up and not be doing any | |
915 // more work | |
916 | |
917 if (verbose_low()) | |
918 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); | |
919 | |
920 // let task 0 do this | |
921 if (task_num == 0) { | |
922 // task 0 is responsible for clearing the global data structures | |
923 clear_marking_state(); | |
924 | |
925 if (PrintGC) { | |
926 gclog_or_tty->date_stamp(PrintGCDateStamps); | |
927 gclog_or_tty->stamp(PrintGCTimeStamps); | |
928 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); | |
929 } | |
930 } | |
931 | |
932 // after this, each task should reset its own data structures then | |
933 // then go into the second barrier | |
934 } | |
935 | |
936 void ConcurrentMark::enter_second_sync_barrier(int task_num) { | |
937 if (verbose_low()) | |
938 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); | |
939 | |
940 ConcurrentGCThread::stsLeave(); | |
941 _second_overflow_barrier_sync.enter(); | |
942 ConcurrentGCThread::stsJoin(); | |
943 // at this point everything should be re-initialised and ready to go | |
944 | |
945 if (verbose_low()) | |
946 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); | |
947 } | |
948 | |
949 void ConcurrentMark::grayRoot(oop p) { | |
950 HeapWord* addr = (HeapWord*) p; | |
951 // We can't really check against _heap_start and _heap_end, since it | |
952 // is possible during an evacuation pause with piggy-backed | |
953 // initial-mark that the committed space is expanded during the | |
954 // pause without CM observing this change. So the assertions below | |
955 // is a bit conservative; but better than nothing. | |
956 tmp_guarantee_CM( _g1h->g1_committed().contains(addr), | |
957 "address should be within the heap bounds" ); | |
958 | |
959 if (!_nextMarkBitMap->isMarked(addr)) | |
960 _nextMarkBitMap->parMark(addr); | |
961 } | |
962 | |
963 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { | |
964 // The objects on the region have already been marked "in bulk" by | |
965 // the caller. We only need to decide whether to push the region on | |
966 // the region stack or not. | |
967 | |
968 if (!concurrent_marking_in_progress() || !_should_gray_objects) | |
969 // We're done with marking and waiting for remark. We do not need to | |
970 // push anything else on the region stack. | |
971 return; | |
972 | |
973 HeapWord* finger = _finger; | |
974 | |
975 if (verbose_low()) | |
976 gclog_or_tty->print_cr("[global] attempting to push " | |
977 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " | |
978 PTR_FORMAT, mr.start(), mr.end(), finger); | |
979 | |
980 if (mr.start() < finger) { | |
981 // The finger is always heap region aligned and it is not possible | |
982 // for mr to span heap regions. | |
983 tmp_guarantee_CM( mr.end() <= finger, "invariant" ); | |
984 | |
985 tmp_guarantee_CM( mr.start() <= mr.end() && | |
986 _heap_start <= mr.start() && | |
987 mr.end() <= _heap_end, | |
988 "region boundaries should fall within the committed space" ); | |
989 if (verbose_low()) | |
990 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " | |
991 "below the finger, pushing it", | |
992 mr.start(), mr.end()); | |
993 | |
994 if (!region_stack_push(mr)) { | |
995 if (verbose_low()) | |
996 gclog_or_tty->print_cr("[global] region stack has overflown."); | |
997 } | |
998 } | |
999 } | |
1000 | |
1001 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { | |
1002 // The object is not marked by the caller. We need to at least mark | |
1003 // it and maybe push in on the stack. | |
1004 | |
1005 HeapWord* addr = (HeapWord*)p; | |
1006 if (!_nextMarkBitMap->isMarked(addr)) { | |
1007 // We definitely need to mark it, irrespective whether we bail out | |
1008 // because we're done with marking. | |
1009 if (_nextMarkBitMap->parMark(addr)) { | |
1010 if (!concurrent_marking_in_progress() || !_should_gray_objects) | |
1011 // If we're done with concurrent marking and we're waiting for | |
1012 // remark, then we're not pushing anything on the stack. | |
1013 return; | |
1014 | |
1015 // No OrderAccess:store_load() is needed. It is implicit in the | |
1016 // CAS done in parMark(addr) above | |
1017 HeapWord* finger = _finger; | |
1018 | |
1019 if (addr < finger) { | |
1020 if (!mark_stack_push(oop(addr))) { | |
1021 if (verbose_low()) | |
1022 gclog_or_tty->print_cr("[global] global stack overflow " | |
1023 "during parMark"); | |
1024 } | |
1025 } | |
1026 } | |
1027 } | |
1028 } | |
1029 | |
1030 class CMConcurrentMarkingTask: public AbstractGangTask { | |
1031 private: | |
1032 ConcurrentMark* _cm; | |
1033 ConcurrentMarkThread* _cmt; | |
1034 | |
1035 public: | |
1036 void work(int worker_i) { | |
1037 guarantee( Thread::current()->is_ConcurrentGC_thread(), | |
1038 "this should only be done by a conc GC thread" ); | |
1039 | |
1040 double start_vtime = os::elapsedVTime(); | |
1041 | |
1042 ConcurrentGCThread::stsJoin(); | |
1043 | |
1044 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); | |
1045 CMTask* the_task = _cm->task(worker_i); | |
1046 the_task->start_co_tracker(); | |
1047 the_task->record_start_time(); | |
1048 if (!_cm->has_aborted()) { | |
1049 do { | |
1050 double start_vtime_sec = os::elapsedVTime(); | |
1051 double start_time_sec = os::elapsedTime(); | |
1052 the_task->do_marking_step(10.0); | |
1053 double end_time_sec = os::elapsedTime(); | |
1054 double end_vtime_sec = os::elapsedVTime(); | |
1055 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; | |
1056 double elapsed_time_sec = end_time_sec - start_time_sec; | |
1057 _cm->clear_has_overflown(); | |
1058 | |
1059 bool ret = _cm->do_yield_check(worker_i); | |
1060 | |
1061 jlong sleep_time_ms; | |
1062 if (!_cm->has_aborted() && the_task->has_aborted()) { | |
1063 sleep_time_ms = | |
1064 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); | |
1065 ConcurrentGCThread::stsLeave(); | |
1066 os::sleep(Thread::current(), sleep_time_ms, false); | |
1067 ConcurrentGCThread::stsJoin(); | |
1068 } | |
1069 double end_time2_sec = os::elapsedTime(); | |
1070 double elapsed_time2_sec = end_time2_sec - start_time_sec; | |
1071 | |
1072 the_task->update_co_tracker(); | |
1073 | |
1074 #if 0 | |
1075 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " | |
1076 "overhead %1.4lf", | |
1077 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, | |
1078 the_task->conc_overhead(os::elapsedTime()) * 8.0); | |
1079 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", | |
1080 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); | |
1081 #endif | |
1082 } while (!_cm->has_aborted() && the_task->has_aborted()); | |
1083 } | |
1084 the_task->record_end_time(); | |
1085 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); | |
1086 | |
1087 ConcurrentGCThread::stsLeave(); | |
1088 | |
1089 double end_vtime = os::elapsedVTime(); | |
1090 the_task->update_co_tracker(true); | |
1091 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); | |
1092 } | |
1093 | |
1094 CMConcurrentMarkingTask(ConcurrentMark* cm, | |
1095 ConcurrentMarkThread* cmt) : | |
1096 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } | |
1097 | |
1098 ~CMConcurrentMarkingTask() { } | |
1099 }; | |
1100 | |
1101 void ConcurrentMark::markFromRoots() { | |
1102 // we might be tempted to assert that: | |
1103 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), | |
1104 // "inconsistent argument?"); | |
1105 // However that wouldn't be right, because it's possible that | |
1106 // a safepoint is indeed in progress as a younger generation | |
1107 // stop-the-world GC happens even as we mark in this generation. | |
1108 | |
1109 _restart_for_overflow = false; | |
1110 | |
1111 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); | |
1112 | |
1113 CMConcurrentMarkingTask markingTask(this, cmThread()); | |
1114 if (parallel_marking_threads() > 0) | |
1115 _parallel_workers->run_task(&markingTask); | |
1116 else | |
1117 markingTask.work(0); | |
1118 print_stats(); | |
1119 } | |
1120 | |
1121 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { | |
1122 // world is stopped at this checkpoint | |
1123 assert(SafepointSynchronize::is_at_safepoint(), | |
1124 "world should be stopped"); | |
1125 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
1126 | |
1127 // If a full collection has happened, we shouldn't do this. | |
1128 if (has_aborted()) { | |
1129 g1h->set_marking_complete(); // So bitmap clearing isn't confused | |
1130 return; | |
1131 } | |
1132 | |
1133 G1CollectorPolicy* g1p = g1h->g1_policy(); | |
1134 g1p->record_concurrent_mark_remark_start(); | |
1135 | |
1136 double start = os::elapsedTime(); | |
1137 GCOverheadReporter::recordSTWStart(start); | |
1138 | |
1139 checkpointRootsFinalWork(); | |
1140 | |
1141 double mark_work_end = os::elapsedTime(); | |
1142 | |
1143 weakRefsWork(clear_all_soft_refs); | |
1144 | |
1145 if (has_overflown()) { | |
1146 // Oops. We overflowed. Restart concurrent marking. | |
1147 _restart_for_overflow = true; | |
1148 // Clear the flag. We do not need it any more. | |
1149 clear_has_overflown(); | |
1150 if (G1TraceMarkStackOverflow) | |
1151 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); | |
1152 } else { | |
1153 // We're done with marking. | |
1154 JavaThread::satb_mark_queue_set().set_active_all_threads(false); | |
1155 } | |
1156 | |
1157 #if VERIFY_OBJS_PROCESSED | |
1158 _scan_obj_cl.objs_processed = 0; | |
1159 ThreadLocalObjQueue::objs_enqueued = 0; | |
1160 #endif | |
1161 | |
1162 // Statistics | |
1163 double now = os::elapsedTime(); | |
1164 _remark_mark_times.add((mark_work_end - start) * 1000.0); | |
1165 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); | |
1166 _remark_times.add((now - start) * 1000.0); | |
1167 | |
1168 GCOverheadReporter::recordSTWEnd(now); | |
1169 for (int i = 0; i < (int)_max_task_num; ++i) | |
1170 _tasks[i]->disable_co_tracker(); | |
1171 _cleanup_co_tracker.enable(); | |
1172 _cleanup_co_tracker.reset(cleanup_task_overhead()); | |
1173 g1p->record_concurrent_mark_remark_end(); | |
1174 } | |
1175 | |
1176 | |
1177 #define CARD_BM_TEST_MODE 0 | |
1178 | |
1179 class CalcLiveObjectsClosure: public HeapRegionClosure { | |
1180 | |
1181 CMBitMapRO* _bm; | |
1182 ConcurrentMark* _cm; | |
1183 COTracker* _co_tracker; | |
1184 bool _changed; | |
1185 bool _yield; | |
1186 size_t _words_done; | |
1187 size_t _tot_live; | |
1188 size_t _tot_used; | |
1189 size_t _regions_done; | |
1190 double _start_vtime_sec; | |
1191 | |
1192 BitMap* _region_bm; | |
1193 BitMap* _card_bm; | |
1194 intptr_t _bottom_card_num; | |
1195 bool _final; | |
1196 | |
1197 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { | |
1198 for (intptr_t i = start_card_num; i <= last_card_num; i++) { | |
1199 #if CARD_BM_TEST_MODE | |
1200 guarantee(_card_bm->at(i - _bottom_card_num), | |
1201 "Should already be set."); | |
1202 #else | |
1203 _card_bm->par_at_put(i - _bottom_card_num, 1); | |
1204 #endif | |
1205 } | |
1206 } | |
1207 | |
1208 public: | |
1209 CalcLiveObjectsClosure(bool final, | |
1210 CMBitMapRO *bm, ConcurrentMark *cm, | |
1211 BitMap* region_bm, BitMap* card_bm, | |
1212 COTracker* co_tracker) : | |
1213 _bm(bm), _cm(cm), _changed(false), _yield(true), | |
1214 _words_done(0), _tot_live(0), _tot_used(0), | |
1215 _region_bm(region_bm), _card_bm(card_bm), | |
1216 _final(final), _co_tracker(co_tracker), | |
1217 _regions_done(0), _start_vtime_sec(0.0) | |
1218 { | |
1219 _bottom_card_num = | |
1220 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> | |
1221 CardTableModRefBS::card_shift); | |
1222 } | |
1223 | |
1224 bool doHeapRegion(HeapRegion* hr) { | |
1225 if (_co_tracker != NULL) | |
1226 _co_tracker->update(); | |
1227 | |
1228 if (!_final && _regions_done == 0) | |
1229 _start_vtime_sec = os::elapsedVTime(); | |
1230 | |
1231 if (hr->continuesHumongous()) return false; | |
1232 | |
1233 HeapWord* nextTop = hr->next_top_at_mark_start(); | |
1234 HeapWord* start = hr->top_at_conc_mark_count(); | |
1235 assert(hr->bottom() <= start && start <= hr->end() && | |
1236 hr->bottom() <= nextTop && nextTop <= hr->end() && | |
1237 start <= nextTop, | |
1238 "Preconditions."); | |
1239 // Otherwise, record the number of word's we'll examine. | |
1240 size_t words_done = (nextTop - start); | |
1241 // Find the first marked object at or after "start". | |
1242 start = _bm->getNextMarkedWordAddress(start, nextTop); | |
1243 size_t marked_bytes = 0; | |
1244 | |
1245 // Below, the term "card num" means the result of shifting an address | |
1246 // by the card shift -- address 0 corresponds to card number 0. One | |
1247 // must subtract the card num of the bottom of the heap to obtain a | |
1248 // card table index. | |
1249 // The first card num of the sequence of live cards currently being | |
1250 // constructed. -1 ==> no sequence. | |
1251 intptr_t start_card_num = -1; | |
1252 // The last card num of the sequence of live cards currently being | |
1253 // constructed. -1 ==> no sequence. | |
1254 intptr_t last_card_num = -1; | |
1255 | |
1256 while (start < nextTop) { | |
1257 if (_yield && _cm->do_yield_check()) { | |
1258 // We yielded. It might be for a full collection, in which case | |
1259 // all bets are off; terminate the traversal. | |
1260 if (_cm->has_aborted()) { | |
1261 _changed = false; | |
1262 return true; | |
1263 } else { | |
1264 // Otherwise, it might be a collection pause, and the region | |
1265 // we're looking at might be in the collection set. We'll | |
1266 // abandon this region. | |
1267 return false; | |
1268 } | |
1269 } | |
1270 oop obj = oop(start); | |
1271 int obj_sz = obj->size(); | |
1272 // The card num of the start of the current object. | |
1273 intptr_t obj_card_num = | |
1274 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); | |
1275 | |
1276 HeapWord* obj_last = start + obj_sz - 1; | |
1277 intptr_t obj_last_card_num = | |
1278 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); | |
1279 | |
1280 if (obj_card_num != last_card_num) { | |
1281 if (start_card_num == -1) { | |
1282 assert(last_card_num == -1, "Both or neither."); | |
1283 start_card_num = obj_card_num; | |
1284 } else { | |
1285 assert(last_card_num != -1, "Both or neither."); | |
1286 assert(obj_card_num >= last_card_num, "Inv"); | |
1287 if ((obj_card_num - last_card_num) > 1) { | |
1288 // Mark the last run, and start a new one. | |
1289 mark_card_num_range(start_card_num, last_card_num); | |
1290 start_card_num = obj_card_num; | |
1291 } | |
1292 } | |
1293 #if CARD_BM_TEST_MODE | |
1294 /* | |
1295 gclog_or_tty->print_cr("Setting bits from %d/%d.", | |
1296 obj_card_num - _bottom_card_num, | |
1297 obj_last_card_num - _bottom_card_num); | |
1298 */ | |
1299 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { | |
1300 _card_bm->par_at_put(j - _bottom_card_num, 1); | |
1301 } | |
1302 #endif | |
1303 } | |
1304 // In any case, we set the last card num. | |
1305 last_card_num = obj_last_card_num; | |
1306 | |
1307 marked_bytes += obj_sz * HeapWordSize; | |
1308 // Find the next marked object after this one. | |
1309 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); | |
1310 _changed = true; | |
1311 } | |
1312 // Handle the last range, if any. | |
1313 if (start_card_num != -1) | |
1314 mark_card_num_range(start_card_num, last_card_num); | |
1315 if (_final) { | |
1316 // Mark the allocated-since-marking portion... | |
1317 HeapWord* tp = hr->top(); | |
1318 if (nextTop < tp) { | |
1319 start_card_num = | |
1320 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); | |
1321 last_card_num = | |
1322 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); | |
1323 mark_card_num_range(start_card_num, last_card_num); | |
1324 // This definitely means the region has live objects. | |
1325 _region_bm->par_at_put(hr->hrs_index(), 1); | |
1326 } | |
1327 } | |
1328 | |
1329 hr->add_to_marked_bytes(marked_bytes); | |
1330 // Update the live region bitmap. | |
1331 if (marked_bytes > 0) { | |
1332 _region_bm->par_at_put(hr->hrs_index(), 1); | |
1333 } | |
1334 hr->set_top_at_conc_mark_count(nextTop); | |
1335 _tot_live += hr->next_live_bytes(); | |
1336 _tot_used += hr->used(); | |
1337 _words_done = words_done; | |
1338 | |
1339 if (!_final) { | |
1340 ++_regions_done; | |
1341 if (_regions_done % 10 == 0) { | |
1342 double end_vtime_sec = os::elapsedVTime(); | |
1343 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; | |
1344 if (elapsed_vtime_sec > (10.0 / 1000.0)) { | |
1345 jlong sleep_time_ms = | |
1346 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); | |
1347 #if 0 | |
1348 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " | |
1349 "overhead %1.4lf", | |
1350 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, | |
1351 _co_tracker->concOverhead(os::elapsedTime())); | |
1352 #endif | |
1353 os::sleep(Thread::current(), sleep_time_ms, false); | |
1354 _start_vtime_sec = end_vtime_sec; | |
1355 } | |
1356 } | |
1357 } | |
1358 | |
1359 return false; | |
1360 } | |
1361 | |
1362 bool changed() { return _changed; } | |
1363 void reset() { _changed = false; _words_done = 0; } | |
1364 void no_yield() { _yield = false; } | |
1365 size_t words_done() { return _words_done; } | |
1366 size_t tot_live() { return _tot_live; } | |
1367 size_t tot_used() { return _tot_used; } | |
1368 }; | |
1369 | |
1370 | |
1371 void ConcurrentMark::calcDesiredRegions() { | |
1372 guarantee( _cleanup_co_tracker.enabled(), "invariant" ); | |
1373 _cleanup_co_tracker.start(); | |
1374 | |
1375 _region_bm.clear(); | |
1376 _card_bm.clear(); | |
1377 CalcLiveObjectsClosure calccl(false /*final*/, | |
1378 nextMarkBitMap(), this, | |
1379 &_region_bm, &_card_bm, | |
1380 &_cleanup_co_tracker); | |
1381 G1CollectedHeap *g1h = G1CollectedHeap::heap(); | |
1382 g1h->heap_region_iterate(&calccl); | |
1383 | |
1384 do { | |
1385 calccl.reset(); | |
1386 g1h->heap_region_iterate(&calccl); | |
1387 } while (calccl.changed()); | |
1388 | |
1389 _cleanup_co_tracker.update(true); | |
1390 } | |
1391 | |
1392 class G1ParFinalCountTask: public AbstractGangTask { | |
1393 protected: | |
1394 G1CollectedHeap* _g1h; | |
1395 CMBitMap* _bm; | |
1396 size_t _n_workers; | |
1397 size_t *_live_bytes; | |
1398 size_t *_used_bytes; | |
1399 BitMap* _region_bm; | |
1400 BitMap* _card_bm; | |
1401 public: | |
1402 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, | |
1403 BitMap* region_bm, BitMap* card_bm) : | |
1404 AbstractGangTask("G1 final counting"), _g1h(g1h), | |
1405 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) | |
1406 { | |
1407 if (ParallelGCThreads > 0) | |
1408 _n_workers = _g1h->workers()->total_workers(); | |
1409 else | |
1410 _n_workers = 1; | |
1411 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); | |
1412 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); | |
1413 } | |
1414 | |
1415 ~G1ParFinalCountTask() { | |
1416 FREE_C_HEAP_ARRAY(size_t, _live_bytes); | |
1417 FREE_C_HEAP_ARRAY(size_t, _used_bytes); | |
1418 } | |
1419 | |
1420 void work(int i) { | |
1421 CalcLiveObjectsClosure calccl(true /*final*/, | |
1422 _bm, _g1h->concurrent_mark(), | |
1423 _region_bm, _card_bm, | |
1424 NULL /* CO tracker */); | |
1425 calccl.no_yield(); | |
1426 if (ParallelGCThreads > 0) { | |
355 | 1427 _g1h->heap_region_par_iterate_chunked(&calccl, i, |
1428 HeapRegion::FinalCountClaimValue); | |
342 | 1429 } else { |
1430 _g1h->heap_region_iterate(&calccl); | |
1431 } | |
1432 assert(calccl.complete(), "Shouldn't have yielded!"); | |
1433 | |
1434 guarantee( (size_t)i < _n_workers, "invariant" ); | |
1435 _live_bytes[i] = calccl.tot_live(); | |
1436 _used_bytes[i] = calccl.tot_used(); | |
1437 } | |
1438 size_t live_bytes() { | |
1439 size_t live_bytes = 0; | |
1440 for (size_t i = 0; i < _n_workers; ++i) | |
1441 live_bytes += _live_bytes[i]; | |
1442 return live_bytes; | |
1443 } | |
1444 size_t used_bytes() { | |
1445 size_t used_bytes = 0; | |
1446 for (size_t i = 0; i < _n_workers; ++i) | |
1447 used_bytes += _used_bytes[i]; | |
1448 return used_bytes; | |
1449 } | |
1450 }; | |
1451 | |
1452 class G1ParNoteEndTask; | |
1453 | |
1454 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { | |
1455 G1CollectedHeap* _g1; | |
1456 int _worker_num; | |
1457 size_t _max_live_bytes; | |
1458 size_t _regions_claimed; | |
1459 size_t _freed_bytes; | |
1460 size_t _cleared_h_regions; | |
1461 size_t _freed_regions; | |
1462 UncleanRegionList* _unclean_region_list; | |
1463 double _claimed_region_time; | |
1464 double _max_region_time; | |
1465 | |
1466 public: | |
1467 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, | |
1468 UncleanRegionList* list, | |
1469 int worker_num); | |
1470 size_t freed_bytes() { return _freed_bytes; } | |
1471 size_t cleared_h_regions() { return _cleared_h_regions; } | |
1472 size_t freed_regions() { return _freed_regions; } | |
1473 UncleanRegionList* unclean_region_list() { | |
1474 return _unclean_region_list; | |
1475 } | |
1476 | |
1477 bool doHeapRegion(HeapRegion *r); | |
1478 | |
1479 size_t max_live_bytes() { return _max_live_bytes; } | |
1480 size_t regions_claimed() { return _regions_claimed; } | |
1481 double claimed_region_time_sec() { return _claimed_region_time; } | |
1482 double max_region_time_sec() { return _max_region_time; } | |
1483 }; | |
1484 | |
1485 class G1ParNoteEndTask: public AbstractGangTask { | |
1486 friend class G1NoteEndOfConcMarkClosure; | |
1487 protected: | |
1488 G1CollectedHeap* _g1h; | |
1489 size_t _max_live_bytes; | |
1490 size_t _freed_bytes; | |
1491 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; | |
1492 public: | |
1493 G1ParNoteEndTask(G1CollectedHeap* g1h, | |
1494 ConcurrentMark::ParCleanupThreadState** | |
1495 par_cleanup_thread_state) : | |
1496 AbstractGangTask("G1 note end"), _g1h(g1h), | |
1497 _max_live_bytes(0), _freed_bytes(0), | |
1498 _par_cleanup_thread_state(par_cleanup_thread_state) | |
1499 {} | |
1500 | |
1501 void work(int i) { | |
1502 double start = os::elapsedTime(); | |
1503 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, | |
1504 &_par_cleanup_thread_state[i]->list, | |
1505 i); | |
1506 if (ParallelGCThreads > 0) { | |
355 | 1507 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, |
1508 HeapRegion::NoteEndClaimValue); | |
342 | 1509 } else { |
1510 _g1h->heap_region_iterate(&g1_note_end); | |
1511 } | |
1512 assert(g1_note_end.complete(), "Shouldn't have yielded!"); | |
1513 | |
1514 // Now finish up freeing the current thread's regions. | |
1515 _g1h->finish_free_region_work(g1_note_end.freed_bytes(), | |
1516 g1_note_end.cleared_h_regions(), | |
1517 0, NULL); | |
1518 { | |
1519 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); | |
1520 _max_live_bytes += g1_note_end.max_live_bytes(); | |
1521 _freed_bytes += g1_note_end.freed_bytes(); | |
1522 } | |
1523 double end = os::elapsedTime(); | |
1524 if (G1PrintParCleanupStats) { | |
1525 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " | |
1526 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", | |
1527 i, start, end, (end-start)*1000.0, | |
1528 g1_note_end.regions_claimed(), | |
1529 g1_note_end.claimed_region_time_sec()*1000.0, | |
1530 g1_note_end.max_region_time_sec()*1000.0); | |
1531 } | |
1532 } | |
1533 size_t max_live_bytes() { return _max_live_bytes; } | |
1534 size_t freed_bytes() { return _freed_bytes; } | |
1535 }; | |
1536 | |
1537 class G1ParScrubRemSetTask: public AbstractGangTask { | |
1538 protected: | |
1539 G1RemSet* _g1rs; | |
1540 BitMap* _region_bm; | |
1541 BitMap* _card_bm; | |
1542 public: | |
1543 G1ParScrubRemSetTask(G1CollectedHeap* g1h, | |
1544 BitMap* region_bm, BitMap* card_bm) : | |
1545 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), | |
1546 _region_bm(region_bm), _card_bm(card_bm) | |
1547 {} | |
1548 | |
1549 void work(int i) { | |
1550 if (ParallelGCThreads > 0) { | |
355 | 1551 _g1rs->scrub_par(_region_bm, _card_bm, i, |
1552 HeapRegion::ScrubRemSetClaimValue); | |
342 | 1553 } else { |
1554 _g1rs->scrub(_region_bm, _card_bm); | |
1555 } | |
1556 } | |
1557 | |
1558 }; | |
1559 | |
1560 G1NoteEndOfConcMarkClosure:: | |
1561 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, | |
1562 UncleanRegionList* list, | |
1563 int worker_num) | |
1564 : _g1(g1), _worker_num(worker_num), | |
1565 _max_live_bytes(0), _regions_claimed(0), | |
1566 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), | |
1567 _claimed_region_time(0.0), _max_region_time(0.0), | |
1568 _unclean_region_list(list) | |
1569 {} | |
1570 | |
1571 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { | |
1572 // We use a claim value of zero here because all regions | |
1573 // were claimed with value 1 in the FinalCount task. | |
1574 r->reset_gc_time_stamp(); | |
1575 if (!r->continuesHumongous()) { | |
1576 double start = os::elapsedTime(); | |
1577 _regions_claimed++; | |
1578 r->note_end_of_marking(); | |
1579 _max_live_bytes += r->max_live_bytes(); | |
1580 _g1->free_region_if_totally_empty_work(r, | |
1581 _freed_bytes, | |
1582 _cleared_h_regions, | |
1583 _freed_regions, | |
1584 _unclean_region_list, | |
1585 true /*par*/); | |
1586 double region_time = (os::elapsedTime() - start); | |
1587 _claimed_region_time += region_time; | |
1588 if (region_time > _max_region_time) _max_region_time = region_time; | |
1589 } | |
1590 return false; | |
1591 } | |
1592 | |
1593 void ConcurrentMark::cleanup() { | |
1594 // world is stopped at this checkpoint | |
1595 assert(SafepointSynchronize::is_at_safepoint(), | |
1596 "world should be stopped"); | |
1597 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
1598 | |
1599 // If a full collection has happened, we shouldn't do this. | |
1600 if (has_aborted()) { | |
1601 g1h->set_marking_complete(); // So bitmap clearing isn't confused | |
1602 return; | |
1603 } | |
1604 | |
1605 _cleanup_co_tracker.disable(); | |
1606 | |
1607 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); | |
1608 g1p->record_concurrent_mark_cleanup_start(); | |
1609 | |
1610 double start = os::elapsedTime(); | |
1611 GCOverheadReporter::recordSTWStart(start); | |
1612 | |
1613 // Do counting once more with the world stopped for good measure. | |
1614 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), | |
1615 &_region_bm, &_card_bm); | |
1616 if (ParallelGCThreads > 0) { | |
355 | 1617 assert(g1h->check_heap_region_claim_values( |
1618 HeapRegion::InitialClaimValue), | |
1619 "sanity check"); | |
1620 | |
342 | 1621 int n_workers = g1h->workers()->total_workers(); |
1622 g1h->set_par_threads(n_workers); | |
1623 g1h->workers()->run_task(&g1_par_count_task); | |
1624 g1h->set_par_threads(0); | |
355 | 1625 |
1626 assert(g1h->check_heap_region_claim_values( | |
1627 HeapRegion::FinalCountClaimValue), | |
1628 "sanity check"); | |
342 | 1629 } else { |
1630 g1_par_count_task.work(0); | |
1631 } | |
1632 | |
1633 size_t known_garbage_bytes = | |
1634 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); | |
1635 #if 0 | |
1636 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", | |
1637 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), | |
1638 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), | |
1639 (double) known_garbage_bytes / (double) (1024 * 1024)); | |
1640 #endif // 0 | |
1641 g1p->set_known_garbage_bytes(known_garbage_bytes); | |
1642 | |
1643 size_t start_used_bytes = g1h->used(); | |
1644 _at_least_one_mark_complete = true; | |
1645 g1h->set_marking_complete(); | |
1646 | |
1647 double count_end = os::elapsedTime(); | |
1648 double this_final_counting_time = (count_end - start); | |
1649 if (G1PrintParCleanupStats) { | |
1650 gclog_or_tty->print_cr("Cleanup:"); | |
1651 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", | |
1652 this_final_counting_time*1000.0); | |
1653 } | |
1654 _total_counting_time += this_final_counting_time; | |
1655 | |
1656 // Install newly created mark bitMap as "prev". | |
1657 swapMarkBitMaps(); | |
1658 | |
1659 g1h->reset_gc_time_stamp(); | |
1660 | |
1661 // Note end of marking in all heap regions. | |
1662 double note_end_start = os::elapsedTime(); | |
1663 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); | |
1664 if (ParallelGCThreads > 0) { | |
1665 int n_workers = g1h->workers()->total_workers(); | |
1666 g1h->set_par_threads(n_workers); | |
1667 g1h->workers()->run_task(&g1_par_note_end_task); | |
1668 g1h->set_par_threads(0); | |
355 | 1669 |
1670 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), | |
1671 "sanity check"); | |
342 | 1672 } else { |
1673 g1_par_note_end_task.work(0); | |
1674 } | |
1675 g1h->set_unclean_regions_coming(true); | |
1676 double note_end_end = os::elapsedTime(); | |
1677 // Tell the mutators that there might be unclean regions coming... | |
1678 if (G1PrintParCleanupStats) { | |
1679 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", | |
1680 (note_end_end - note_end_start)*1000.0); | |
1681 } | |
1682 | |
355 | 1683 |
342 | 1684 // call below, since it affects the metric by which we sort the heap |
1685 // regions. | |
1686 if (G1ScrubRemSets) { | |
1687 double rs_scrub_start = os::elapsedTime(); | |
1688 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); | |
1689 if (ParallelGCThreads > 0) { | |
1690 int n_workers = g1h->workers()->total_workers(); | |
1691 g1h->set_par_threads(n_workers); | |
1692 g1h->workers()->run_task(&g1_par_scrub_rs_task); | |
1693 g1h->set_par_threads(0); | |
355 | 1694 |
1695 assert(g1h->check_heap_region_claim_values( | |
1696 HeapRegion::ScrubRemSetClaimValue), | |
1697 "sanity check"); | |
342 | 1698 } else { |
1699 g1_par_scrub_rs_task.work(0); | |
1700 } | |
1701 | |
1702 double rs_scrub_end = os::elapsedTime(); | |
1703 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); | |
1704 _total_rs_scrub_time += this_rs_scrub_time; | |
1705 } | |
1706 | |
1707 // this will also free any regions totally full of garbage objects, | |
1708 // and sort the regions. | |
1709 g1h->g1_policy()->record_concurrent_mark_cleanup_end( | |
1710 g1_par_note_end_task.freed_bytes(), | |
1711 g1_par_note_end_task.max_live_bytes()); | |
1712 | |
1713 // Statistics. | |
1714 double end = os::elapsedTime(); | |
1715 _cleanup_times.add((end - start) * 1000.0); | |
1716 GCOverheadReporter::recordSTWEnd(end); | |
1717 | |
1718 // G1CollectedHeap::heap()->print(); | |
1719 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", | |
1720 // G1CollectedHeap::heap()->get_gc_time_stamp()); | |
1721 | |
1722 if (PrintGC || PrintGCDetails) { | |
1723 g1h->print_size_transition(gclog_or_tty, | |
1724 start_used_bytes, | |
1725 g1h->used(), | |
1726 g1h->capacity()); | |
1727 } | |
1728 | |
1729 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); | |
1730 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); | |
1731 | |
1732 // We need to make this be a "collection" so any collection pause that | |
1733 // races with it goes around and waits for completeCleanup to finish. | |
1734 g1h->increment_total_collections(); | |
1735 | |
1736 #ifndef PRODUCT | |
1737 if (G1VerifyConcMark) { | |
1738 G1CollectedHeap::heap()->prepare_for_verify(); | |
1739 G1CollectedHeap::heap()->verify(true,false); | |
1740 } | |
1741 #endif | |
1742 } | |
1743 | |
1744 void ConcurrentMark::completeCleanup() { | |
1745 // A full collection intervened. | |
1746 if (has_aborted()) return; | |
1747 | |
1748 int first = 0; | |
1749 int last = (int)MAX2(ParallelGCThreads, (size_t)1); | |
1750 for (int t = 0; t < last; t++) { | |
1751 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; | |
1752 assert(list->well_formed(), "Inv"); | |
1753 HeapRegion* hd = list->hd(); | |
1754 while (hd != NULL) { | |
1755 // Now finish up the other stuff. | |
1756 hd->rem_set()->clear(); | |
1757 HeapRegion* next_hd = hd->next_from_unclean_list(); | |
1758 (void)list->pop(); | |
1759 guarantee(list->hd() == next_hd, "how not?"); | |
1760 _g1h->put_region_on_unclean_list(hd); | |
1761 if (!hd->isHumongous()) { | |
1762 // Add this to the _free_regions count by 1. | |
1763 _g1h->finish_free_region_work(0, 0, 1, NULL); | |
1764 } | |
1765 hd = list->hd(); | |
1766 guarantee(hd == next_hd, "how not?"); | |
1767 } | |
1768 } | |
1769 } | |
1770 | |
1771 | |
1772 class G1CMIsAliveClosure: public BoolObjectClosure { | |
1773 G1CollectedHeap* _g1; | |
1774 public: | |
1775 G1CMIsAliveClosure(G1CollectedHeap* g1) : | |
1776 _g1(g1) | |
1777 {} | |
1778 | |
1779 void do_object(oop obj) { | |
1780 assert(false, "not to be invoked"); | |
1781 } | |
1782 bool do_object_b(oop obj) { | |
1783 HeapWord* addr = (HeapWord*)obj; | |
1784 return addr != NULL && | |
1785 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); | |
1786 } | |
1787 }; | |
1788 | |
1789 class G1CMKeepAliveClosure: public OopClosure { | |
1790 G1CollectedHeap* _g1; | |
1791 ConcurrentMark* _cm; | |
1792 CMBitMap* _bitMap; | |
1793 public: | |
1794 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, | |
1795 CMBitMap* bitMap) : | |
1796 _g1(g1), _cm(cm), | |
1797 _bitMap(bitMap) {} | |
1798 | |
1799 void do_oop(narrowOop* p) { | |
1800 guarantee(false, "NYI"); | |
1801 } | |
1802 | |
1803 void do_oop(oop* p) { | |
1804 oop thisOop = *p; | |
1805 HeapWord* addr = (HeapWord*)thisOop; | |
1806 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { | |
1807 _bitMap->mark(addr); | |
1808 _cm->mark_stack_push(thisOop); | |
1809 } | |
1810 } | |
1811 }; | |
1812 | |
1813 class G1CMDrainMarkingStackClosure: public VoidClosure { | |
1814 CMMarkStack* _markStack; | |
1815 CMBitMap* _bitMap; | |
1816 G1CMKeepAliveClosure* _oopClosure; | |
1817 public: | |
1818 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, | |
1819 G1CMKeepAliveClosure* oopClosure) : | |
1820 _bitMap(bitMap), | |
1821 _markStack(markStack), | |
1822 _oopClosure(oopClosure) | |
1823 {} | |
1824 | |
1825 void do_void() { | |
1826 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); | |
1827 } | |
1828 }; | |
1829 | |
1830 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { | |
1831 ResourceMark rm; | |
1832 HandleMark hm; | |
453
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1833 G1CollectedHeap* g1h = G1CollectedHeap::heap(); |
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1834 ReferenceProcessor* rp = g1h->ref_processor(); |
342 | 1835 |
1836 // Process weak references. | |
453
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1837 rp->snap_policy(clear_all_soft_refs); |
342 | 1838 assert(_markStack.isEmpty(), "mark stack should be empty"); |
1839 | |
453
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1840 G1CMIsAliveClosure g1IsAliveClosure (g1h); |
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1841 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap()); |
342 | 1842 G1CMDrainMarkingStackClosure |
1843 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, | |
1844 &g1KeepAliveClosure); | |
1845 | |
1846 // XXXYYY Also: copy the parallel ref processing code from CMS. | |
453
c96030fff130
6684579: SoftReference processing can be made more efficient
ysr
parents:
355
diff
changeset
|
1847 rp->process_discovered_references(&g1IsAliveClosure, |
342 | 1848 &g1KeepAliveClosure, |
1849 &g1DrainMarkingStackClosure, | |
1850 NULL); | |
1851 assert(_markStack.overflow() || _markStack.isEmpty(), | |
1852 "mark stack should be empty (unless it overflowed)"); | |
1853 if (_markStack.overflow()) { | |
1854 set_has_overflown(); | |
1855 } | |
1856 | |
1857 rp->enqueue_discovered_references(); | |
1858 rp->verify_no_references_recorded(); | |
1859 assert(!rp->discovery_enabled(), "should have been disabled"); | |
1860 | |
1861 // Now clean up stale oops in SymbolTable and StringTable | |
1862 SymbolTable::unlink(&g1IsAliveClosure); | |
1863 StringTable::unlink(&g1IsAliveClosure); | |
1864 } | |
1865 | |
1866 void ConcurrentMark::swapMarkBitMaps() { | |
1867 CMBitMapRO* temp = _prevMarkBitMap; | |
1868 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; | |
1869 _nextMarkBitMap = (CMBitMap*) temp; | |
1870 } | |
1871 | |
1872 class CMRemarkTask: public AbstractGangTask { | |
1873 private: | |
1874 ConcurrentMark *_cm; | |
1875 | |
1876 public: | |
1877 void work(int worker_i) { | |
1878 // Since all available tasks are actually started, we should | |
1879 // only proceed if we're supposed to be actived. | |
1880 if ((size_t)worker_i < _cm->active_tasks()) { | |
1881 CMTask* task = _cm->task(worker_i); | |
1882 task->record_start_time(); | |
1883 do { | |
1884 task->do_marking_step(1000000000.0 /* something very large */); | |
1885 } while (task->has_aborted() && !_cm->has_overflown()); | |
1886 // If we overflow, then we do not want to restart. We instead | |
1887 // want to abort remark and do concurrent marking again. | |
1888 task->record_end_time(); | |
1889 } | |
1890 } | |
1891 | |
1892 CMRemarkTask(ConcurrentMark* cm) : | |
1893 AbstractGangTask("Par Remark"), _cm(cm) { } | |
1894 }; | |
1895 | |
1896 void ConcurrentMark::checkpointRootsFinalWork() { | |
1897 ResourceMark rm; | |
1898 HandleMark hm; | |
1899 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
1900 | |
1901 g1h->ensure_parsability(false); | |
1902 | |
1903 if (ParallelGCThreads > 0) { | |
1904 g1h->change_strong_roots_parity(); | |
1905 // this is remark, so we'll use up all available threads | |
1906 int active_workers = ParallelGCThreads; | |
1907 set_phase(active_workers, false); | |
1908 | |
1909 CMRemarkTask remarkTask(this); | |
1910 // We will start all available threads, even if we decide that the | |
1911 // active_workers will be fewer. The extra ones will just bail out | |
1912 // immediately. | |
1913 int n_workers = g1h->workers()->total_workers(); | |
1914 g1h->set_par_threads(n_workers); | |
1915 g1h->workers()->run_task(&remarkTask); | |
1916 g1h->set_par_threads(0); | |
1917 | |
1918 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
1919 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); | |
1920 } else { | |
1921 g1h->change_strong_roots_parity(); | |
1922 // this is remark, so we'll use up all available threads | |
1923 int active_workers = 1; | |
1924 set_phase(active_workers, false); | |
1925 | |
1926 CMRemarkTask remarkTask(this); | |
1927 // We will start all available threads, even if we decide that the | |
1928 // active_workers will be fewer. The extra ones will just bail out | |
1929 // immediately. | |
1930 remarkTask.work(0); | |
1931 | |
1932 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
1933 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); | |
1934 } | |
1935 | |
1936 print_stats(); | |
1937 | |
1938 if (!restart_for_overflow()) | |
1939 set_non_marking_state(); | |
1940 | |
1941 #if VERIFY_OBJS_PROCESSED | |
1942 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { | |
1943 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", | |
1944 _scan_obj_cl.objs_processed, | |
1945 ThreadLocalObjQueue::objs_enqueued); | |
1946 guarantee(_scan_obj_cl.objs_processed == | |
1947 ThreadLocalObjQueue::objs_enqueued, | |
1948 "Different number of objs processed and enqueued."); | |
1949 } | |
1950 #endif | |
1951 } | |
1952 | |
1953 class ReachablePrinterOopClosure: public OopClosure { | |
1954 private: | |
1955 G1CollectedHeap* _g1h; | |
1956 CMBitMapRO* _bitmap; | |
1957 outputStream* _out; | |
1958 | |
1959 public: | |
1960 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : | |
1961 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } | |
1962 | |
1963 void do_oop(narrowOop* p) { | |
1964 guarantee(false, "NYI"); | |
1965 } | |
1966 | |
1967 void do_oop(oop* p) { | |
1968 oop obj = *p; | |
1969 const char* str = NULL; | |
1970 const char* str2 = ""; | |
1971 | |
1972 if (!_g1h->is_in_g1_reserved(obj)) | |
1973 str = "outside G1 reserved"; | |
1974 else { | |
1975 HeapRegion* hr = _g1h->heap_region_containing(obj); | |
1976 guarantee( hr != NULL, "invariant" ); | |
1977 if (hr->obj_allocated_since_prev_marking(obj)) { | |
1978 str = "over TAMS"; | |
1979 if (_bitmap->isMarked((HeapWord*) obj)) | |
1980 str2 = " AND MARKED"; | |
1981 } else if (_bitmap->isMarked((HeapWord*) obj)) | |
1982 str = "marked"; | |
1983 else | |
1984 str = "#### NOT MARKED ####"; | |
1985 } | |
1986 | |
1987 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", | |
1988 p, (void*) obj, str, str2); | |
1989 } | |
1990 }; | |
1991 | |
1992 class ReachablePrinterClosure: public BitMapClosure { | |
1993 private: | |
1994 CMBitMapRO* _bitmap; | |
1995 outputStream* _out; | |
1996 | |
1997 public: | |
1998 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : | |
1999 _bitmap(bitmap), _out(out) { } | |
2000 | |
2001 bool do_bit(size_t offset) { | |
2002 HeapWord* addr = _bitmap->offsetToHeapWord(offset); | |
2003 ReachablePrinterOopClosure oopCl(_bitmap, _out); | |
2004 | |
2005 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); | |
2006 oop(addr)->oop_iterate(&oopCl); | |
2007 _out->print_cr(""); | |
2008 | |
2009 return true; | |
2010 } | |
2011 }; | |
2012 | |
2013 class ObjInRegionReachablePrinterClosure : public ObjectClosure { | |
2014 private: | |
2015 CMBitMapRO* _bitmap; | |
2016 outputStream* _out; | |
2017 | |
2018 public: | |
2019 void do_object(oop o) { | |
2020 ReachablePrinterOopClosure oopCl(_bitmap, _out); | |
2021 | |
2022 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); | |
2023 o->oop_iterate(&oopCl); | |
2024 _out->print_cr(""); | |
2025 } | |
2026 | |
2027 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : | |
2028 _bitmap(bitmap), _out(out) { } | |
2029 }; | |
2030 | |
2031 class RegionReachablePrinterClosure : public HeapRegionClosure { | |
2032 private: | |
2033 CMBitMapRO* _bitmap; | |
2034 outputStream* _out; | |
2035 | |
2036 public: | |
2037 bool doHeapRegion(HeapRegion* hr) { | |
2038 HeapWord* b = hr->bottom(); | |
2039 HeapWord* e = hr->end(); | |
2040 HeapWord* t = hr->top(); | |
2041 HeapWord* p = hr->prev_top_at_mark_start(); | |
2042 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " | |
2043 "PTAMS: "PTR_FORMAT, b, e, t, p); | |
2044 _out->print_cr(""); | |
2045 | |
2046 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); | |
2047 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); | |
2048 | |
2049 return false; | |
2050 } | |
2051 | |
2052 RegionReachablePrinterClosure(CMBitMapRO* bitmap, | |
2053 outputStream* out) : | |
2054 _bitmap(bitmap), _out(out) { } | |
2055 }; | |
2056 | |
2057 void ConcurrentMark::print_prev_bitmap_reachable() { | |
2058 outputStream* out = gclog_or_tty; | |
2059 | |
2060 #if SEND_HEAP_DUMP_TO_FILE | |
2061 guarantee(heap_dump_file == NULL, "Protocol"); | |
2062 char fn_buf[100]; | |
2063 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); | |
2064 heap_dump_file = fopen(fn_buf, "w"); | |
2065 fileStream fstream(heap_dump_file); | |
2066 out = &fstream; | |
2067 #endif // SEND_HEAP_DUMP_TO_FILE | |
2068 | |
2069 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); | |
2070 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); | |
2071 _g1h->heap_region_iterate(&rcl); | |
2072 out->print_cr(""); | |
2073 | |
2074 ReachablePrinterClosure cl(_prevMarkBitMap, out); | |
2075 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); | |
2076 _prevMarkBitMap->iterate(&cl); | |
2077 out->print_cr(""); | |
2078 | |
2079 #if SEND_HEAP_DUMP_TO_FILE | |
2080 fclose(heap_dump_file); | |
2081 heap_dump_file = NULL; | |
2082 #endif // SEND_HEAP_DUMP_TO_FILE | |
2083 } | |
2084 | |
2085 // This note is for drainAllSATBBuffers and the code in between. | |
2086 // In the future we could reuse a task to do this work during an | |
2087 // evacuation pause (since now tasks are not active and can be claimed | |
2088 // during an evacuation pause). This was a late change to the code and | |
2089 // is currently not being taken advantage of. | |
2090 | |
2091 class CMGlobalObjectClosure : public ObjectClosure { | |
2092 private: | |
2093 ConcurrentMark* _cm; | |
2094 | |
2095 public: | |
2096 void do_object(oop obj) { | |
2097 _cm->deal_with_reference(obj); | |
2098 } | |
2099 | |
2100 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } | |
2101 }; | |
2102 | |
2103 void ConcurrentMark::deal_with_reference(oop obj) { | |
2104 if (verbose_high()) | |
2105 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, | |
2106 (void*) obj); | |
2107 | |
2108 | |
2109 HeapWord* objAddr = (HeapWord*) obj; | |
2110 if (_g1h->is_in_g1_reserved(objAddr)) { | |
2111 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); | |
2112 HeapRegion* hr = _g1h->heap_region_containing(obj); | |
2113 if (_g1h->is_obj_ill(obj, hr)) { | |
2114 if (verbose_high()) | |
2115 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " | |
2116 "marked", (void*) obj); | |
2117 | |
2118 // we need to mark it first | |
2119 if (_nextMarkBitMap->parMark(objAddr)) { | |
2120 // No OrderAccess:store_load() is needed. It is implicit in the | |
2121 // CAS done in parMark(objAddr) above | |
2122 HeapWord* finger = _finger; | |
2123 if (objAddr < finger) { | |
2124 if (verbose_high()) | |
2125 gclog_or_tty->print_cr("[global] below the global finger " | |
2126 "("PTR_FORMAT"), pushing it", finger); | |
2127 if (!mark_stack_push(obj)) { | |
2128 if (verbose_low()) | |
2129 gclog_or_tty->print_cr("[global] global stack overflow during " | |
2130 "deal_with_reference"); | |
2131 } | |
2132 } | |
2133 } | |
2134 } | |
2135 } | |
2136 } | |
2137 | |
2138 void ConcurrentMark::drainAllSATBBuffers() { | |
2139 CMGlobalObjectClosure oc(this); | |
2140 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
2141 satb_mq_set.set_closure(&oc); | |
2142 | |
2143 while (satb_mq_set.apply_closure_to_completed_buffer()) { | |
2144 if (verbose_medium()) | |
2145 gclog_or_tty->print_cr("[global] processed an SATB buffer"); | |
2146 } | |
2147 | |
2148 // no need to check whether we should do this, as this is only | |
2149 // called during an evacuation pause | |
2150 satb_mq_set.iterate_closure_all_threads(); | |
2151 | |
2152 satb_mq_set.set_closure(NULL); | |
2153 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); | |
2154 } | |
2155 | |
2156 void ConcurrentMark::markPrev(oop p) { | |
2157 // Note we are overriding the read-only view of the prev map here, via | |
2158 // the cast. | |
2159 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); | |
2160 } | |
2161 | |
2162 void ConcurrentMark::clear(oop p) { | |
2163 assert(p != NULL && p->is_oop(), "expected an oop"); | |
2164 HeapWord* addr = (HeapWord*)p; | |
2165 assert(addr >= _nextMarkBitMap->startWord() || | |
2166 addr < _nextMarkBitMap->endWord(), "in a region"); | |
2167 | |
2168 _nextMarkBitMap->clear(addr); | |
2169 } | |
2170 | |
2171 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { | |
2172 // Note we are overriding the read-only view of the prev map here, via | |
2173 // the cast. | |
2174 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); | |
2175 _nextMarkBitMap->clearRange(mr); | |
2176 } | |
2177 | |
2178 HeapRegion* | |
2179 ConcurrentMark::claim_region(int task_num) { | |
2180 // "checkpoint" the finger | |
2181 HeapWord* finger = _finger; | |
2182 | |
2183 // _heap_end will not change underneath our feet; it only changes at | |
2184 // yield points. | |
2185 while (finger < _heap_end) { | |
2186 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); | |
2187 | |
2188 // is the gap between reading the finger and doing the CAS too long? | |
2189 | |
2190 HeapRegion* curr_region = _g1h->heap_region_containing(finger); | |
2191 HeapWord* bottom = curr_region->bottom(); | |
2192 HeapWord* end = curr_region->end(); | |
2193 HeapWord* limit = curr_region->next_top_at_mark_start(); | |
2194 | |
2195 if (verbose_low()) | |
2196 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " | |
2197 "["PTR_FORMAT", "PTR_FORMAT"), " | |
2198 "limit = "PTR_FORMAT, | |
2199 task_num, curr_region, bottom, end, limit); | |
2200 | |
2201 HeapWord* res = | |
2202 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); | |
2203 if (res == finger) { | |
2204 // we succeeded | |
2205 | |
2206 // notice that _finger == end cannot be guaranteed here since, | |
2207 // someone else might have moved the finger even further | |
2208 guarantee( _finger >= end, "the finger should have moved forward" ); | |
2209 | |
2210 if (verbose_low()) | |
2211 gclog_or_tty->print_cr("[%d] we were successful with region = " | |
2212 PTR_FORMAT, task_num, curr_region); | |
2213 | |
2214 if (limit > bottom) { | |
2215 if (verbose_low()) | |
2216 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " | |
2217 "returning it ", task_num, curr_region); | |
2218 return curr_region; | |
2219 } else { | |
2220 tmp_guarantee_CM( limit == bottom, | |
2221 "the region limit should be at bottom" ); | |
2222 if (verbose_low()) | |
2223 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " | |
2224 "returning NULL", task_num, curr_region); | |
2225 // we return NULL and the caller should try calling | |
2226 // claim_region() again. | |
2227 return NULL; | |
2228 } | |
2229 } else { | |
2230 guarantee( _finger > finger, "the finger should have moved forward" ); | |
2231 if (verbose_low()) | |
2232 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " | |
2233 "global finger = "PTR_FORMAT", " | |
2234 "our finger = "PTR_FORMAT, | |
2235 task_num, _finger, finger); | |
2236 | |
2237 // read it again | |
2238 finger = _finger; | |
2239 } | |
2240 } | |
2241 | |
2242 return NULL; | |
2243 } | |
2244 | |
2245 void ConcurrentMark::oops_do(OopClosure* cl) { | |
2246 if (_markStack.size() > 0 && verbose_low()) | |
2247 gclog_or_tty->print_cr("[global] scanning the global marking stack, " | |
2248 "size = %d", _markStack.size()); | |
2249 // we first iterate over the contents of the mark stack... | |
2250 _markStack.oops_do(cl); | |
2251 | |
2252 for (int i = 0; i < (int)_max_task_num; ++i) { | |
2253 OopTaskQueue* queue = _task_queues->queue((int)i); | |
2254 | |
2255 if (queue->size() > 0 && verbose_low()) | |
2256 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " | |
2257 "size = %d", i, queue->size()); | |
2258 | |
2259 // ...then over the contents of the all the task queues. | |
2260 queue->oops_do(cl); | |
2261 } | |
2262 | |
2263 // finally, invalidate any entries that in the region stack that | |
2264 // point into the collection set | |
2265 if (_regionStack.invalidate_entries_into_cset()) { | |
2266 // otherwise, any gray objects copied during the evacuation pause | |
2267 // might not be visited. | |
2268 guarantee( _should_gray_objects, "invariant" ); | |
2269 } | |
2270 } | |
2271 | |
2272 void ConcurrentMark::clear_marking_state() { | |
2273 _markStack.setEmpty(); | |
2274 _markStack.clear_overflow(); | |
2275 _regionStack.setEmpty(); | |
2276 _regionStack.clear_overflow(); | |
2277 clear_has_overflown(); | |
2278 _finger = _heap_start; | |
2279 | |
2280 for (int i = 0; i < (int)_max_task_num; ++i) { | |
2281 OopTaskQueue* queue = _task_queues->queue(i); | |
2282 queue->set_empty(); | |
2283 } | |
2284 } | |
2285 | |
2286 void ConcurrentMark::print_stats() { | |
2287 if (verbose_stats()) { | |
2288 gclog_or_tty->print_cr("---------------------------------------------------------------------"); | |
2289 for (size_t i = 0; i < _active_tasks; ++i) { | |
2290 _tasks[i]->print_stats(); | |
2291 gclog_or_tty->print_cr("---------------------------------------------------------------------"); | |
2292 } | |
2293 } | |
2294 } | |
2295 | |
2296 class CSMarkOopClosure: public OopClosure { | |
2297 friend class CSMarkBitMapClosure; | |
2298 | |
2299 G1CollectedHeap* _g1h; | |
2300 CMBitMap* _bm; | |
2301 ConcurrentMark* _cm; | |
2302 oop* _ms; | |
2303 jint* _array_ind_stack; | |
2304 int _ms_size; | |
2305 int _ms_ind; | |
2306 int _array_increment; | |
2307 | |
2308 bool push(oop obj, int arr_ind = 0) { | |
2309 if (_ms_ind == _ms_size) { | |
2310 gclog_or_tty->print_cr("Mark stack is full."); | |
2311 return false; | |
2312 } | |
2313 _ms[_ms_ind] = obj; | |
2314 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; | |
2315 _ms_ind++; | |
2316 return true; | |
2317 } | |
2318 | |
2319 oop pop() { | |
2320 if (_ms_ind == 0) return NULL; | |
2321 else { | |
2322 _ms_ind--; | |
2323 return _ms[_ms_ind]; | |
2324 } | |
2325 } | |
2326 | |
2327 bool drain() { | |
2328 while (_ms_ind > 0) { | |
2329 oop obj = pop(); | |
2330 assert(obj != NULL, "Since index was non-zero."); | |
2331 if (obj->is_objArray()) { | |
2332 jint arr_ind = _array_ind_stack[_ms_ind]; | |
2333 objArrayOop aobj = objArrayOop(obj); | |
2334 jint len = aobj->length(); | |
2335 jint next_arr_ind = arr_ind + _array_increment; | |
2336 if (next_arr_ind < len) { | |
2337 push(obj, next_arr_ind); | |
2338 } | |
2339 // Now process this portion of this one. | |
2340 int lim = MIN2(next_arr_ind, len); | |
2341 assert(!UseCompressedOops, "This needs to be fixed"); | |
2342 for (int j = arr_ind; j < lim; j++) { | |
2343 do_oop(aobj->obj_at_addr<oop>(j)); | |
2344 } | |
2345 | |
2346 } else { | |
2347 obj->oop_iterate(this); | |
2348 } | |
2349 if (abort()) return false; | |
2350 } | |
2351 return true; | |
2352 } | |
2353 | |
2354 public: | |
2355 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : | |
2356 _g1h(G1CollectedHeap::heap()), | |
2357 _cm(cm), | |
2358 _bm(cm->nextMarkBitMap()), | |
2359 _ms_size(ms_size), _ms_ind(0), | |
2360 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), | |
2361 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), | |
2362 _array_increment(MAX2(ms_size/8, 16)) | |
2363 {} | |
2364 | |
2365 ~CSMarkOopClosure() { | |
2366 FREE_C_HEAP_ARRAY(oop, _ms); | |
2367 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); | |
2368 } | |
2369 | |
2370 void do_oop(narrowOop* p) { | |
2371 guarantee(false, "NYI"); | |
2372 } | |
2373 | |
2374 void do_oop(oop* p) { | |
2375 oop obj = *p; | |
2376 if (obj == NULL) return; | |
2377 if (obj->is_forwarded()) { | |
2378 // If the object has already been forwarded, we have to make sure | |
2379 // that it's marked. So follow the forwarding pointer. Note that | |
2380 // this does the right thing for self-forwarding pointers in the | |
2381 // evacuation failure case. | |
2382 obj = obj->forwardee(); | |
2383 } | |
2384 HeapRegion* hr = _g1h->heap_region_containing(obj); | |
2385 if (hr != NULL) { | |
2386 if (hr->in_collection_set()) { | |
2387 if (_g1h->is_obj_ill(obj)) { | |
2388 _bm->mark((HeapWord*)obj); | |
2389 if (!push(obj)) { | |
2390 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); | |
2391 set_abort(); | |
2392 } | |
2393 } | |
2394 } else { | |
2395 // Outside the collection set; we need to gray it | |
2396 _cm->deal_with_reference(obj); | |
2397 } | |
2398 } | |
2399 } | |
2400 }; | |
2401 | |
2402 class CSMarkBitMapClosure: public BitMapClosure { | |
2403 G1CollectedHeap* _g1h; | |
2404 CMBitMap* _bitMap; | |
2405 ConcurrentMark* _cm; | |
2406 CSMarkOopClosure _oop_cl; | |
2407 public: | |
2408 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : | |
2409 _g1h(G1CollectedHeap::heap()), | |
2410 _bitMap(cm->nextMarkBitMap()), | |
2411 _oop_cl(cm, ms_size) | |
2412 {} | |
2413 | |
2414 ~CSMarkBitMapClosure() {} | |
2415 | |
2416 bool do_bit(size_t offset) { | |
2417 // convert offset into a HeapWord* | |
2418 HeapWord* addr = _bitMap->offsetToHeapWord(offset); | |
2419 assert(_bitMap->endWord() && addr < _bitMap->endWord(), | |
2420 "address out of range"); | |
2421 assert(_bitMap->isMarked(addr), "tautology"); | |
2422 oop obj = oop(addr); | |
2423 if (!obj->is_forwarded()) { | |
2424 if (!_oop_cl.push(obj)) return false; | |
2425 if (!_oop_cl.drain()) return false; | |
2426 } | |
2427 // Otherwise... | |
2428 return true; | |
2429 } | |
2430 }; | |
2431 | |
2432 | |
2433 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { | |
2434 CMBitMap* _bm; | |
2435 CSMarkBitMapClosure _bit_cl; | |
2436 enum SomePrivateConstants { | |
2437 MSSize = 1000 | |
2438 }; | |
2439 bool _completed; | |
2440 public: | |
2441 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : | |
2442 _bm(cm->nextMarkBitMap()), | |
2443 _bit_cl(cm, MSSize), | |
2444 _completed(true) | |
2445 {} | |
2446 | |
2447 ~CompleteMarkingInCSHRClosure() {} | |
2448 | |
2449 bool doHeapRegion(HeapRegion* r) { | |
2450 if (!r->evacuation_failed()) { | |
2451 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); | |
2452 if (!mr.is_empty()) { | |
2453 if (!_bm->iterate(&_bit_cl, mr)) { | |
2454 _completed = false; | |
2455 return true; | |
2456 } | |
2457 } | |
2458 } | |
2459 return false; | |
2460 } | |
2461 | |
2462 bool completed() { return _completed; } | |
2463 }; | |
2464 | |
2465 class ClearMarksInHRClosure: public HeapRegionClosure { | |
2466 CMBitMap* _bm; | |
2467 public: | |
2468 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } | |
2469 | |
2470 bool doHeapRegion(HeapRegion* r) { | |
2471 if (!r->used_region().is_empty() && !r->evacuation_failed()) { | |
2472 MemRegion usedMR = r->used_region(); | |
2473 _bm->clearRange(r->used_region()); | |
2474 } | |
2475 return false; | |
2476 } | |
2477 }; | |
2478 | |
2479 void ConcurrentMark::complete_marking_in_collection_set() { | |
2480 G1CollectedHeap* g1h = G1CollectedHeap::heap(); | |
2481 | |
2482 if (!g1h->mark_in_progress()) { | |
2483 g1h->g1_policy()->record_mark_closure_time(0.0); | |
2484 return; | |
2485 } | |
2486 | |
2487 int i = 1; | |
2488 double start = os::elapsedTime(); | |
2489 while (true) { | |
2490 i++; | |
2491 CompleteMarkingInCSHRClosure cmplt(this); | |
2492 g1h->collection_set_iterate(&cmplt); | |
2493 if (cmplt.completed()) break; | |
2494 } | |
2495 double end_time = os::elapsedTime(); | |
2496 double elapsed_time_ms = (end_time - start) * 1000.0; | |
2497 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); | |
2498 if (PrintGCDetails) { | |
2499 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); | |
2500 } | |
2501 | |
2502 ClearMarksInHRClosure clr(nextMarkBitMap()); | |
2503 g1h->collection_set_iterate(&clr); | |
2504 } | |
2505 | |
2506 // The next two methods deal with the following optimisation. Some | |
2507 // objects are gray by being marked and located above the finger. If | |
2508 // they are copied, during an evacuation pause, below the finger then | |
2509 // the need to be pushed on the stack. The observation is that, if | |
2510 // there are no regions in the collection set located above the | |
2511 // finger, then the above cannot happen, hence we do not need to | |
2512 // explicitly gray any objects when copying them to below the | |
2513 // finger. The global stack will be scanned to ensure that, if it | |
2514 // points to objects being copied, it will update their | |
2515 // location. There is a tricky situation with the gray objects in | |
2516 // region stack that are being coped, however. See the comment in | |
2517 // newCSet(). | |
2518 | |
2519 void ConcurrentMark::newCSet() { | |
2520 if (!concurrent_marking_in_progress()) | |
2521 // nothing to do if marking is not in progress | |
2522 return; | |
2523 | |
2524 // find what the lowest finger is among the global and local fingers | |
2525 _min_finger = _finger; | |
2526 for (int i = 0; i < (int)_max_task_num; ++i) { | |
2527 CMTask* task = _tasks[i]; | |
2528 HeapWord* task_finger = task->finger(); | |
2529 if (task_finger != NULL && task_finger < _min_finger) | |
2530 _min_finger = task_finger; | |
2531 } | |
2532 | |
2533 _should_gray_objects = false; | |
2534 | |
2535 // This fixes a very subtle and fustrating bug. It might be the case | |
2536 // that, during en evacuation pause, heap regions that contain | |
2537 // objects that are gray (by being in regions contained in the | |
2538 // region stack) are included in the collection set. Since such gray | |
2539 // objects will be moved, and because it's not easy to redirect | |
2540 // region stack entries to point to a new location (because objects | |
2541 // in one region might be scattered to multiple regions after they | |
2542 // are copied), one option is to ensure that all marked objects | |
2543 // copied during a pause are pushed on the stack. Notice, however, | |
2544 // that this problem can only happen when the region stack is not | |
2545 // empty during an evacuation pause. So, we make the fix a bit less | |
2546 // conservative and ensure that regions are pushed on the stack, | |
2547 // irrespective whether all collection set regions are below the | |
2548 // finger, if the region stack is not empty. This is expected to be | |
2549 // a rare case, so I don't think it's necessary to be smarted about it. | |
2550 if (!region_stack_empty()) | |
2551 _should_gray_objects = true; | |
2552 } | |
2553 | |
2554 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { | |
2555 if (!concurrent_marking_in_progress()) | |
2556 return; | |
2557 | |
2558 HeapWord* region_end = hr->end(); | |
2559 if (region_end > _min_finger) | |
2560 _should_gray_objects = true; | |
2561 } | |
2562 | |
2563 void ConcurrentMark::disable_co_trackers() { | |
2564 if (has_aborted()) { | |
2565 if (_cleanup_co_tracker.enabled()) | |
2566 _cleanup_co_tracker.disable(); | |
2567 for (int i = 0; i < (int)_max_task_num; ++i) { | |
2568 CMTask* task = _tasks[i]; | |
2569 if (task->co_tracker_enabled()) | |
2570 task->disable_co_tracker(); | |
2571 } | |
2572 } else { | |
2573 guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); | |
2574 for (int i = 0; i < (int)_max_task_num; ++i) { | |
2575 CMTask* task = _tasks[i]; | |
2576 guarantee( !task->co_tracker_enabled(), "invariant" ); | |
2577 } | |
2578 } | |
2579 } | |
2580 | |
2581 // abandon current marking iteration due to a Full GC | |
2582 void ConcurrentMark::abort() { | |
2583 // If we're not marking, nothing to do. | |
2584 if (!G1ConcMark) return; | |
2585 | |
2586 // Clear all marks to force marking thread to do nothing | |
2587 _nextMarkBitMap->clearAll(); | |
2588 // Empty mark stack | |
2589 clear_marking_state(); | |
2590 for (int i = 0; i < (int)_max_task_num; ++i) | |
2591 _tasks[i]->clear_region_fields(); | |
2592 _has_aborted = true; | |
2593 | |
2594 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
2595 satb_mq_set.abandon_partial_marking(); | |
2596 satb_mq_set.set_active_all_threads(false); | |
2597 } | |
2598 | |
2599 static void print_ms_time_info(const char* prefix, const char* name, | |
2600 NumberSeq& ns) { | |
2601 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", | |
2602 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); | |
2603 if (ns.num() > 0) { | |
2604 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", | |
2605 prefix, ns.sd(), ns.maximum()); | |
2606 } | |
2607 } | |
2608 | |
2609 void ConcurrentMark::print_summary_info() { | |
2610 gclog_or_tty->print_cr(" Concurrent marking:"); | |
2611 print_ms_time_info(" ", "init marks", _init_times); | |
2612 print_ms_time_info(" ", "remarks", _remark_times); | |
2613 { | |
2614 print_ms_time_info(" ", "final marks", _remark_mark_times); | |
2615 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); | |
2616 | |
2617 } | |
2618 print_ms_time_info(" ", "cleanups", _cleanup_times); | |
2619 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", | |
2620 _total_counting_time, | |
2621 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / | |
2622 (double)_cleanup_times.num() | |
2623 : 0.0)); | |
2624 if (G1ScrubRemSets) { | |
2625 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", | |
2626 _total_rs_scrub_time, | |
2627 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / | |
2628 (double)_cleanup_times.num() | |
2629 : 0.0)); | |
2630 } | |
2631 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", | |
2632 (_init_times.sum() + _remark_times.sum() + | |
2633 _cleanup_times.sum())/1000.0); | |
2634 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " | |
2635 "(%8.2f s marking, %8.2f s counting).", | |
2636 cmThread()->vtime_accum(), | |
2637 cmThread()->vtime_mark_accum(), | |
2638 cmThread()->vtime_count_accum()); | |
2639 } | |
2640 | |
2641 // Closures | |
2642 // XXX: there seems to be a lot of code duplication here; | |
2643 // should refactor and consolidate the shared code. | |
2644 | |
2645 // This closure is used to mark refs into the CMS generation in | |
2646 // the CMS bit map. Called at the first checkpoint. | |
2647 | |
2648 // We take a break if someone is trying to stop the world. | |
2649 bool ConcurrentMark::do_yield_check(int worker_i) { | |
2650 if (should_yield()) { | |
2651 if (worker_i == 0) | |
2652 _g1h->g1_policy()->record_concurrent_pause(); | |
2653 cmThread()->yield(); | |
2654 if (worker_i == 0) | |
2655 _g1h->g1_policy()->record_concurrent_pause_end(); | |
2656 return true; | |
2657 } else { | |
2658 return false; | |
2659 } | |
2660 } | |
2661 | |
2662 bool ConcurrentMark::should_yield() { | |
2663 return cmThread()->should_yield(); | |
2664 } | |
2665 | |
2666 bool ConcurrentMark::containing_card_is_marked(void* p) { | |
2667 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); | |
2668 return _card_bm.at(offset >> CardTableModRefBS::card_shift); | |
2669 } | |
2670 | |
2671 bool ConcurrentMark::containing_cards_are_marked(void* start, | |
2672 void* last) { | |
2673 return | |
2674 containing_card_is_marked(start) && | |
2675 containing_card_is_marked(last); | |
2676 } | |
2677 | |
2678 #ifndef PRODUCT | |
2679 // for debugging purposes | |
2680 void ConcurrentMark::print_finger() { | |
2681 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, | |
2682 _heap_start, _heap_end, _finger); | |
2683 for (int i = 0; i < (int) _max_task_num; ++i) { | |
2684 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); | |
2685 } | |
2686 gclog_or_tty->print_cr(""); | |
2687 } | |
2688 #endif | |
2689 | |
2690 // Closure for iteration over bitmaps | |
2691 class CMBitMapClosure : public BitMapClosure { | |
2692 private: | |
2693 // the bitmap that is being iterated over | |
2694 CMBitMap* _nextMarkBitMap; | |
2695 ConcurrentMark* _cm; | |
2696 CMTask* _task; | |
2697 // true if we're scanning a heap region claimed by the task (so that | |
2698 // we move the finger along), false if we're not, i.e. currently when | |
2699 // scanning a heap region popped from the region stack (so that we | |
2700 // do not move the task finger along; it'd be a mistake if we did so). | |
2701 bool _scanning_heap_region; | |
2702 | |
2703 public: | |
2704 CMBitMapClosure(CMTask *task, | |
2705 ConcurrentMark* cm, | |
2706 CMBitMap* nextMarkBitMap) | |
2707 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } | |
2708 | |
2709 void set_scanning_heap_region(bool scanning_heap_region) { | |
2710 _scanning_heap_region = scanning_heap_region; | |
2711 } | |
2712 | |
2713 bool do_bit(size_t offset) { | |
2714 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); | |
2715 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" ); | |
2716 tmp_guarantee_CM( addr < _cm->finger(), "invariant" ); | |
2717 | |
2718 if (_scanning_heap_region) { | |
2719 statsOnly( _task->increase_objs_found_on_bitmap() ); | |
2720 tmp_guarantee_CM( addr >= _task->finger(), "invariant" ); | |
2721 // We move that task's local finger along. | |
2722 _task->move_finger_to(addr); | |
2723 } else { | |
2724 // We move the task's region finger along. | |
2725 _task->move_region_finger_to(addr); | |
2726 } | |
2727 | |
2728 _task->scan_object(oop(addr)); | |
2729 // we only partially drain the local queue and global stack | |
2730 _task->drain_local_queue(true); | |
2731 _task->drain_global_stack(true); | |
2732 | |
2733 // if the has_aborted flag has been raised, we need to bail out of | |
2734 // the iteration | |
2735 return !_task->has_aborted(); | |
2736 } | |
2737 }; | |
2738 | |
2739 // Closure for iterating over objects, currently only used for | |
2740 // processing SATB buffers. | |
2741 class CMObjectClosure : public ObjectClosure { | |
2742 private: | |
2743 CMTask* _task; | |
2744 | |
2745 public: | |
2746 void do_object(oop obj) { | |
2747 _task->deal_with_reference(obj); | |
2748 } | |
2749 | |
2750 CMObjectClosure(CMTask* task) : _task(task) { } | |
2751 }; | |
2752 | |
2753 // Closure for iterating over object fields | |
2754 class CMOopClosure : public OopClosure { | |
2755 private: | |
2756 G1CollectedHeap* _g1h; | |
2757 ConcurrentMark* _cm; | |
2758 CMTask* _task; | |
2759 | |
2760 public: | |
2761 void do_oop(narrowOop* p) { | |
2762 guarantee(false, "NYI"); | |
2763 } | |
2764 | |
2765 void do_oop(oop* p) { | |
2766 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" ); | |
2767 | |
2768 oop obj = *p; | |
2769 if (_cm->verbose_high()) | |
2770 gclog_or_tty->print_cr("[%d] we're looking at location " | |
2771 "*"PTR_FORMAT" = "PTR_FORMAT, | |
2772 _task->task_id(), p, (void*) obj); | |
2773 _task->deal_with_reference(obj); | |
2774 } | |
2775 | |
2776 CMOopClosure(G1CollectedHeap* g1h, | |
2777 ConcurrentMark* cm, | |
2778 CMTask* task) | |
2779 : _g1h(g1h), _cm(cm), _task(task) { } | |
2780 }; | |
2781 | |
2782 void CMTask::setup_for_region(HeapRegion* hr) { | |
2783 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(), | |
2784 "claim_region() should have filtered out continues humongous regions" ); | |
2785 | |
2786 if (_cm->verbose_low()) | |
2787 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, | |
2788 _task_id, hr); | |
2789 | |
2790 _curr_region = hr; | |
2791 _finger = hr->bottom(); | |
2792 update_region_limit(); | |
2793 } | |
2794 | |
2795 void CMTask::update_region_limit() { | |
2796 HeapRegion* hr = _curr_region; | |
2797 HeapWord* bottom = hr->bottom(); | |
2798 HeapWord* limit = hr->next_top_at_mark_start(); | |
2799 | |
2800 if (limit == bottom) { | |
2801 if (_cm->verbose_low()) | |
2802 gclog_or_tty->print_cr("[%d] found an empty region " | |
2803 "["PTR_FORMAT", "PTR_FORMAT")", | |
2804 _task_id, bottom, limit); | |
2805 // The region was collected underneath our feet. | |
2806 // We set the finger to bottom to ensure that the bitmap | |
2807 // iteration that will follow this will not do anything. | |
2808 // (this is not a condition that holds when we set the region up, | |
2809 // as the region is not supposed to be empty in the first place) | |
2810 _finger = bottom; | |
2811 } else if (limit >= _region_limit) { | |
2812 tmp_guarantee_CM( limit >= _finger, "peace of mind" ); | |
2813 } else { | |
2814 tmp_guarantee_CM( limit < _region_limit, "only way to get here" ); | |
2815 // This can happen under some pretty unusual circumstances. An | |
2816 // evacuation pause empties the region underneath our feet (NTAMS | |
2817 // at bottom). We then do some allocation in the region (NTAMS | |
2818 // stays at bottom), followed by the region being used as a GC | |
2819 // alloc region (NTAMS will move to top() and the objects | |
2820 // originally below it will be grayed). All objects now marked in | |
2821 // the region are explicitly grayed, if below the global finger, | |
2822 // and we do not need in fact to scan anything else. So, we simply | |
2823 // set _finger to be limit to ensure that the bitmap iteration | |
2824 // doesn't do anything. | |
2825 _finger = limit; | |
2826 } | |
2827 | |
2828 _region_limit = limit; | |
2829 } | |
2830 | |
2831 void CMTask::giveup_current_region() { | |
2832 tmp_guarantee_CM( _curr_region != NULL, "invariant" ); | |
2833 if (_cm->verbose_low()) | |
2834 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, | |
2835 _task_id, _curr_region); | |
2836 clear_region_fields(); | |
2837 } | |
2838 | |
2839 void CMTask::clear_region_fields() { | |
2840 // Values for these three fields that indicate that we're not | |
2841 // holding on to a region. | |
2842 _curr_region = NULL; | |
2843 _finger = NULL; | |
2844 _region_limit = NULL; | |
2845 | |
2846 _region_finger = NULL; | |
2847 } | |
2848 | |
2849 void CMTask::reset(CMBitMap* nextMarkBitMap) { | |
2850 guarantee( nextMarkBitMap != NULL, "invariant" ); | |
2851 | |
2852 if (_cm->verbose_low()) | |
2853 gclog_or_tty->print_cr("[%d] resetting", _task_id); | |
2854 | |
2855 _nextMarkBitMap = nextMarkBitMap; | |
2856 clear_region_fields(); | |
2857 | |
2858 _calls = 0; | |
2859 _elapsed_time_ms = 0.0; | |
2860 _termination_time_ms = 0.0; | |
2861 _termination_start_time_ms = 0.0; | |
2862 | |
2863 #if _MARKING_STATS_ | |
2864 _local_pushes = 0; | |
2865 _local_pops = 0; | |
2866 _local_max_size = 0; | |
2867 _objs_scanned = 0; | |
2868 _global_pushes = 0; | |
2869 _global_pops = 0; | |
2870 _global_max_size = 0; | |
2871 _global_transfers_to = 0; | |
2872 _global_transfers_from = 0; | |
2873 _region_stack_pops = 0; | |
2874 _regions_claimed = 0; | |
2875 _objs_found_on_bitmap = 0; | |
2876 _satb_buffers_processed = 0; | |
2877 _steal_attempts = 0; | |
2878 _steals = 0; | |
2879 _aborted = 0; | |
2880 _aborted_overflow = 0; | |
2881 _aborted_cm_aborted = 0; | |
2882 _aborted_yield = 0; | |
2883 _aborted_timed_out = 0; | |
2884 _aborted_satb = 0; | |
2885 _aborted_termination = 0; | |
2886 #endif // _MARKING_STATS_ | |
2887 } | |
2888 | |
2889 bool CMTask::should_exit_termination() { | |
2890 regular_clock_call(); | |
2891 // This is called when we are in the termination protocol. We should | |
2892 // quit if, for some reason, this task wants to abort or the global | |
2893 // stack is not empty (this means that we can get work from it). | |
2894 return !_cm->mark_stack_empty() || has_aborted(); | |
2895 } | |
2896 | |
2897 // This determines whether the method below will check both the local | |
2898 // and global fingers when determining whether to push on the stack a | |
2899 // gray object (value 1) or whether it will only check the global one | |
2900 // (value 0). The tradeoffs are that the former will be a bit more | |
2901 // accurate and possibly push less on the stack, but it might also be | |
2902 // a little bit slower. | |
2903 | |
2904 #define _CHECK_BOTH_FINGERS_ 1 | |
2905 | |
2906 void CMTask::deal_with_reference(oop obj) { | |
2907 if (_cm->verbose_high()) | |
2908 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, | |
2909 _task_id, (void*) obj); | |
2910 | |
2911 ++_refs_reached; | |
2912 | |
2913 HeapWord* objAddr = (HeapWord*) obj; | |
2914 if (_g1h->is_in_g1_reserved(objAddr)) { | |
2915 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); | |
2916 HeapRegion* hr = _g1h->heap_region_containing(obj); | |
2917 if (_g1h->is_obj_ill(obj, hr)) { | |
2918 if (_cm->verbose_high()) | |
2919 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", | |
2920 _task_id, (void*) obj); | |
2921 | |
2922 // we need to mark it first | |
2923 if (_nextMarkBitMap->parMark(objAddr)) { | |
2924 // No OrderAccess:store_load() is needed. It is implicit in the | |
2925 // CAS done in parMark(objAddr) above | |
2926 HeapWord* global_finger = _cm->finger(); | |
2927 | |
2928 #if _CHECK_BOTH_FINGERS_ | |
2929 // we will check both the local and global fingers | |
2930 | |
2931 if (_finger != NULL && objAddr < _finger) { | |
2932 if (_cm->verbose_high()) | |
2933 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " | |
2934 "pushing it", _task_id, _finger); | |
2935 push(obj); | |
2936 } else if (_curr_region != NULL && objAddr < _region_limit) { | |
2937 // do nothing | |
2938 } else if (objAddr < global_finger) { | |
2939 // Notice that the global finger might be moving forward | |
2940 // concurrently. This is not a problem. In the worst case, we | |
2941 // mark the object while it is above the global finger and, by | |
2942 // the time we read the global finger, it has moved forward | |
2943 // passed this object. In this case, the object will probably | |
2944 // be visited when a task is scanning the region and will also | |
2945 // be pushed on the stack. So, some duplicate work, but no | |
2946 // correctness problems. | |
2947 | |
2948 if (_cm->verbose_high()) | |
2949 gclog_or_tty->print_cr("[%d] below the global finger " | |
2950 "("PTR_FORMAT"), pushing it", | |
2951 _task_id, global_finger); | |
2952 push(obj); | |
2953 } else { | |
2954 // do nothing | |
2955 } | |
2956 #else // _CHECK_BOTH_FINGERS_ | |
2957 // we will only check the global finger | |
2958 | |
2959 if (objAddr < global_finger) { | |
2960 // see long comment above | |
2961 | |
2962 if (_cm->verbose_high()) | |
2963 gclog_or_tty->print_cr("[%d] below the global finger " | |
2964 "("PTR_FORMAT"), pushing it", | |
2965 _task_id, global_finger); | |
2966 push(obj); | |
2967 } | |
2968 #endif // _CHECK_BOTH_FINGERS_ | |
2969 } | |
2970 } | |
2971 } | |
2972 } | |
2973 | |
2974 void CMTask::push(oop obj) { | |
2975 HeapWord* objAddr = (HeapWord*) obj; | |
2976 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" ); | |
2977 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" ); | |
2978 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" ); | |
2979 | |
2980 if (_cm->verbose_high()) | |
2981 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); | |
2982 | |
2983 if (!_task_queue->push(obj)) { | |
2984 // The local task queue looks full. We need to push some entries | |
2985 // to the global stack. | |
2986 | |
2987 if (_cm->verbose_medium()) | |
2988 gclog_or_tty->print_cr("[%d] task queue overflow, " | |
2989 "moving entries to the global stack", | |
2990 _task_id); | |
2991 move_entries_to_global_stack(); | |
2992 | |
2993 // this should succeed since, even if we overflow the global | |
2994 // stack, we should have definitely removed some entries from the | |
2995 // local queue. So, there must be space on it. | |
2996 bool success = _task_queue->push(obj); | |
2997 tmp_guarantee_CM( success, "invariant" ); | |
2998 } | |
2999 | |
3000 statsOnly( int tmp_size = _task_queue->size(); | |
3001 if (tmp_size > _local_max_size) | |
3002 _local_max_size = tmp_size; | |
3003 ++_local_pushes ); | |
3004 } | |
3005 | |
3006 void CMTask::reached_limit() { | |
3007 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit || | |
3008 _refs_reached >= _refs_reached_limit , | |
3009 "shouldn't have been called otherwise" ); | |
3010 regular_clock_call(); | |
3011 } | |
3012 | |
3013 void CMTask::regular_clock_call() { | |
3014 if (has_aborted()) | |
3015 return; | |
3016 | |
3017 // First, we need to recalculate the words scanned and refs reached | |
3018 // limits for the next clock call. | |
3019 recalculate_limits(); | |
3020 | |
3021 // During the regular clock call we do the following | |
3022 | |
3023 // (1) If an overflow has been flagged, then we abort. | |
3024 if (_cm->has_overflown()) { | |
3025 set_has_aborted(); | |
3026 return; | |
3027 } | |
3028 | |
3029 // If we are not concurrent (i.e. we're doing remark) we don't need | |
3030 // to check anything else. The other steps are only needed during | |
3031 // the concurrent marking phase. | |
3032 if (!concurrent()) | |
3033 return; | |
3034 | |
3035 // (2) If marking has been aborted for Full GC, then we also abort. | |
3036 if (_cm->has_aborted()) { | |
3037 set_has_aborted(); | |
3038 statsOnly( ++_aborted_cm_aborted ); | |
3039 return; | |
3040 } | |
3041 | |
3042 double curr_time_ms = os::elapsedVTime() * 1000.0; | |
3043 | |
3044 // (3) If marking stats are enabled, then we update the step history. | |
3045 #if _MARKING_STATS_ | |
3046 if (_words_scanned >= _words_scanned_limit) | |
3047 ++_clock_due_to_scanning; | |
3048 if (_refs_reached >= _refs_reached_limit) | |
3049 ++_clock_due_to_marking; | |
3050 | |
3051 double last_interval_ms = curr_time_ms - _interval_start_time_ms; | |
3052 _interval_start_time_ms = curr_time_ms; | |
3053 _all_clock_intervals_ms.add(last_interval_ms); | |
3054 | |
3055 if (_cm->verbose_medium()) { | |
3056 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " | |
3057 "scanned = %d%s, refs reached = %d%s", | |
3058 _task_id, last_interval_ms, | |
3059 _words_scanned, | |
3060 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", | |
3061 _refs_reached, | |
3062 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); | |
3063 } | |
3064 #endif // _MARKING_STATS_ | |
3065 | |
3066 // (4) We check whether we should yield. If we have to, then we abort. | |
3067 if (_cm->should_yield()) { | |
3068 // We should yield. To do this we abort the task. The caller is | |
3069 // responsible for yielding. | |
3070 set_has_aborted(); | |
3071 statsOnly( ++_aborted_yield ); | |
3072 return; | |
3073 } | |
3074 | |
3075 // (5) We check whether we've reached our time quota. If we have, | |
3076 // then we abort. | |
3077 double elapsed_time_ms = curr_time_ms - _start_time_ms; | |
3078 if (elapsed_time_ms > _time_target_ms) { | |
3079 set_has_aborted(); | |
3080 _has_aborted_timed_out = true; | |
3081 statsOnly( ++_aborted_timed_out ); | |
3082 return; | |
3083 } | |
3084 | |
3085 // (6) Finally, we check whether there are enough completed STAB | |
3086 // buffers available for processing. If there are, we abort. | |
3087 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
3088 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { | |
3089 if (_cm->verbose_low()) | |
3090 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", | |
3091 _task_id); | |
3092 // we do need to process SATB buffers, we'll abort and restart | |
3093 // the marking task to do so | |
3094 set_has_aborted(); | |
3095 statsOnly( ++_aborted_satb ); | |
3096 return; | |
3097 } | |
3098 } | |
3099 | |
3100 void CMTask::recalculate_limits() { | |
3101 _real_words_scanned_limit = _words_scanned + words_scanned_period; | |
3102 _words_scanned_limit = _real_words_scanned_limit; | |
3103 | |
3104 _real_refs_reached_limit = _refs_reached + refs_reached_period; | |
3105 _refs_reached_limit = _real_refs_reached_limit; | |
3106 } | |
3107 | |
3108 void CMTask::decrease_limits() { | |
3109 // This is called when we believe that we're going to do an infrequent | |
3110 // operation which will increase the per byte scanned cost (i.e. move | |
3111 // entries to/from the global stack). It basically tries to decrease the | |
3112 // scanning limit so that the clock is called earlier. | |
3113 | |
3114 if (_cm->verbose_medium()) | |
3115 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); | |
3116 | |
3117 _words_scanned_limit = _real_words_scanned_limit - | |
3118 3 * words_scanned_period / 4; | |
3119 _refs_reached_limit = _real_refs_reached_limit - | |
3120 3 * refs_reached_period / 4; | |
3121 } | |
3122 | |
3123 void CMTask::move_entries_to_global_stack() { | |
3124 // local array where we'll store the entries that will be popped | |
3125 // from the local queue | |
3126 oop buffer[global_stack_transfer_size]; | |
3127 | |
3128 int n = 0; | |
3129 oop obj; | |
3130 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { | |
3131 buffer[n] = obj; | |
3132 ++n; | |
3133 } | |
3134 | |
3135 if (n > 0) { | |
3136 // we popped at least one entry from the local queue | |
3137 | |
3138 statsOnly( ++_global_transfers_to; _local_pops += n ); | |
3139 | |
3140 if (!_cm->mark_stack_push(buffer, n)) { | |
3141 if (_cm->verbose_low()) | |
3142 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); | |
3143 set_has_aborted(); | |
3144 } else { | |
3145 // the transfer was successful | |
3146 | |
3147 if (_cm->verbose_medium()) | |
3148 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", | |
3149 _task_id, n); | |
3150 statsOnly( int tmp_size = _cm->mark_stack_size(); | |
3151 if (tmp_size > _global_max_size) | |
3152 _global_max_size = tmp_size; | |
3153 _global_pushes += n ); | |
3154 } | |
3155 } | |
3156 | |
3157 // this operation was quite expensive, so decrease the limits | |
3158 decrease_limits(); | |
3159 } | |
3160 | |
3161 void CMTask::get_entries_from_global_stack() { | |
3162 // local array where we'll store the entries that will be popped | |
3163 // from the global stack. | |
3164 oop buffer[global_stack_transfer_size]; | |
3165 int n; | |
3166 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); | |
3167 tmp_guarantee_CM( n <= global_stack_transfer_size, | |
3168 "we should not pop more than the given limit" ); | |
3169 if (n > 0) { | |
3170 // yes, we did actually pop at least one entry | |
3171 | |
3172 statsOnly( ++_global_transfers_from; _global_pops += n ); | |
3173 if (_cm->verbose_medium()) | |
3174 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", | |
3175 _task_id, n); | |
3176 for (int i = 0; i < n; ++i) { | |
3177 bool success = _task_queue->push(buffer[i]); | |
3178 // We only call this when the local queue is empty or under a | |
3179 // given target limit. So, we do not expect this push to fail. | |
3180 tmp_guarantee_CM( success, "invariant" ); | |
3181 } | |
3182 | |
3183 statsOnly( int tmp_size = _task_queue->size(); | |
3184 if (tmp_size > _local_max_size) | |
3185 _local_max_size = tmp_size; | |
3186 _local_pushes += n ); | |
3187 } | |
3188 | |
3189 // this operation was quite expensive, so decrease the limits | |
3190 decrease_limits(); | |
3191 } | |
3192 | |
3193 void CMTask::drain_local_queue(bool partially) { | |
3194 if (has_aborted()) | |
3195 return; | |
3196 | |
3197 // Decide what the target size is, depending whether we're going to | |
3198 // drain it partially (so that other tasks can steal if they run out | |
3199 // of things to do) or totally (at the very end). | |
3200 size_t target_size; | |
3201 if (partially) | |
3202 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); | |
3203 else | |
3204 target_size = 0; | |
3205 | |
3206 if (_task_queue->size() > target_size) { | |
3207 if (_cm->verbose_high()) | |
3208 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", | |
3209 _task_id, target_size); | |
3210 | |
3211 oop obj; | |
3212 bool ret = _task_queue->pop_local(obj); | |
3213 while (ret) { | |
3214 statsOnly( ++_local_pops ); | |
3215 | |
3216 if (_cm->verbose_high()) | |
3217 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, | |
3218 (void*) obj); | |
3219 | |
3220 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj), | |
3221 "invariant" ); | |
3222 | |
3223 scan_object(obj); | |
3224 | |
3225 if (_task_queue->size() <= target_size || has_aborted()) | |
3226 ret = false; | |
3227 else | |
3228 ret = _task_queue->pop_local(obj); | |
3229 } | |
3230 | |
3231 if (_cm->verbose_high()) | |
3232 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", | |
3233 _task_id, _task_queue->size()); | |
3234 } | |
3235 } | |
3236 | |
3237 void CMTask::drain_global_stack(bool partially) { | |
3238 if (has_aborted()) | |
3239 return; | |
3240 | |
3241 // We have a policy to drain the local queue before we attempt to | |
3242 // drain the global stack. | |
3243 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" ); | |
3244 | |
3245 // Decide what the target size is, depending whether we're going to | |
3246 // drain it partially (so that other tasks can steal if they run out | |
3247 // of things to do) or totally (at the very end). Notice that, | |
3248 // because we move entries from the global stack in chunks or | |
3249 // because another task might be doing the same, we might in fact | |
3250 // drop below the target. But, this is not a problem. | |
3251 size_t target_size; | |
3252 if (partially) | |
3253 target_size = _cm->partial_mark_stack_size_target(); | |
3254 else | |
3255 target_size = 0; | |
3256 | |
3257 if (_cm->mark_stack_size() > target_size) { | |
3258 if (_cm->verbose_low()) | |
3259 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", | |
3260 _task_id, target_size); | |
3261 | |
3262 while (!has_aborted() && _cm->mark_stack_size() > target_size) { | |
3263 get_entries_from_global_stack(); | |
3264 drain_local_queue(partially); | |
3265 } | |
3266 | |
3267 if (_cm->verbose_low()) | |
3268 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", | |
3269 _task_id, _cm->mark_stack_size()); | |
3270 } | |
3271 } | |
3272 | |
3273 // SATB Queue has several assumptions on whether to call the par or | |
3274 // non-par versions of the methods. this is why some of the code is | |
3275 // replicated. We should really get rid of the single-threaded version | |
3276 // of the code to simplify things. | |
3277 void CMTask::drain_satb_buffers() { | |
3278 if (has_aborted()) | |
3279 return; | |
3280 | |
3281 // We set this so that the regular clock knows that we're in the | |
3282 // middle of draining buffers and doesn't set the abort flag when it | |
3283 // notices that SATB buffers are available for draining. It'd be | |
3284 // very counter productive if it did that. :-) | |
3285 _draining_satb_buffers = true; | |
3286 | |
3287 CMObjectClosure oc(this); | |
3288 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); | |
3289 if (ParallelGCThreads > 0) | |
3290 satb_mq_set.set_par_closure(_task_id, &oc); | |
3291 else | |
3292 satb_mq_set.set_closure(&oc); | |
3293 | |
3294 // This keeps claiming and applying the closure to completed buffers | |
3295 // until we run out of buffers or we need to abort. | |
3296 if (ParallelGCThreads > 0) { | |
3297 while (!has_aborted() && | |
3298 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { | |
3299 if (_cm->verbose_medium()) | |
3300 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); | |
3301 statsOnly( ++_satb_buffers_processed ); | |
3302 regular_clock_call(); | |
3303 } | |
3304 } else { | |
3305 while (!has_aborted() && | |
3306 satb_mq_set.apply_closure_to_completed_buffer()) { | |
3307 if (_cm->verbose_medium()) | |
3308 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); | |
3309 statsOnly( ++_satb_buffers_processed ); | |
3310 regular_clock_call(); | |
3311 } | |
3312 } | |
3313 | |
3314 if (!concurrent() && !has_aborted()) { | |
3315 // We should only do this during remark. | |
3316 if (ParallelGCThreads > 0) | |
3317 satb_mq_set.par_iterate_closure_all_threads(_task_id); | |
3318 else | |
3319 satb_mq_set.iterate_closure_all_threads(); | |
3320 } | |
3321 | |
3322 _draining_satb_buffers = false; | |
3323 | |
3324 tmp_guarantee_CM( has_aborted() || | |
3325 concurrent() || | |
3326 satb_mq_set.completed_buffers_num() == 0, "invariant" ); | |
3327 | |
3328 if (ParallelGCThreads > 0) | |
3329 satb_mq_set.set_par_closure(_task_id, NULL); | |
3330 else | |
3331 satb_mq_set.set_closure(NULL); | |
3332 | |
3333 // again, this was a potentially expensive operation, decrease the | |
3334 // limits to get the regular clock call early | |
3335 decrease_limits(); | |
3336 } | |
3337 | |
3338 void CMTask::drain_region_stack(BitMapClosure* bc) { | |
3339 if (has_aborted()) | |
3340 return; | |
3341 | |
3342 tmp_guarantee_CM( _region_finger == NULL, | |
3343 "it should be NULL when we're not scanning a region" ); | |
3344 | |
3345 if (!_cm->region_stack_empty()) { | |
3346 if (_cm->verbose_low()) | |
3347 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", | |
3348 _task_id, _cm->region_stack_size()); | |
3349 | |
3350 MemRegion mr = _cm->region_stack_pop(); | |
3351 // it returns MemRegion() if the pop fails | |
3352 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); | |
3353 | |
3354 while (mr.start() != NULL) { | |
3355 if (_cm->verbose_medium()) | |
3356 gclog_or_tty->print_cr("[%d] we are scanning region " | |
3357 "["PTR_FORMAT", "PTR_FORMAT")", | |
3358 _task_id, mr.start(), mr.end()); | |
3359 tmp_guarantee_CM( mr.end() <= _cm->finger(), | |
3360 "otherwise the region shouldn't be on the stack" ); | |
3361 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); | |
3362 if (_nextMarkBitMap->iterate(bc, mr)) { | |
3363 tmp_guarantee_CM( !has_aborted(), | |
3364 "cannot abort the task without aborting the bitmap iteration" ); | |
3365 | |
3366 // We finished iterating over the region without aborting. | |
3367 regular_clock_call(); | |
3368 if (has_aborted()) | |
3369 mr = MemRegion(); | |
3370 else { | |
3371 mr = _cm->region_stack_pop(); | |
3372 // it returns MemRegion() if the pop fails | |
3373 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); | |
3374 } | |
3375 } else { | |
3376 guarantee( has_aborted(), "currently the only way to do so" ); | |
3377 | |
3378 // The only way to abort the bitmap iteration is to return | |
3379 // false from the do_bit() method. However, inside the | |
3380 // do_bit() method we move the _region_finger to point to the | |
3381 // object currently being looked at. So, if we bail out, we | |
3382 // have definitely set _region_finger to something non-null. | |
3383 guarantee( _region_finger != NULL, "invariant" ); | |
3384 | |
3385 // The iteration was actually aborted. So now _region_finger | |
3386 // points to the address of the object we last scanned. If we | |
3387 // leave it there, when we restart this task, we will rescan | |
3388 // the object. It is easy to avoid this. We move the finger by | |
3389 // enough to point to the next possible object header (the | |
3390 // bitmap knows by how much we need to move it as it knows its | |
3391 // granularity). | |
3392 MemRegion newRegion = | |
3393 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); | |
3394 | |
3395 if (!newRegion.is_empty()) { | |
3396 if (_cm->verbose_low()) { | |
3397 gclog_or_tty->print_cr("[%d] pushing unscanned region" | |
3398 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", | |
3399 _task_id, | |
3400 newRegion.start(), newRegion.end()); | |
3401 } | |
3402 // Now push the part of the region we didn't scan on the | |
3403 // region stack to make sure a task scans it later. | |
3404 _cm->region_stack_push(newRegion); | |
3405 } | |
3406 // break from while | |
3407 mr = MemRegion(); | |
3408 } | |
3409 _region_finger = NULL; | |
3410 } | |
3411 | |
3412 // We only push regions on the region stack during evacuation | |
3413 // pauses. So if we come out the above iteration because we region | |
3414 // stack is empty, it will remain empty until the next yield | |
3415 // point. So, the guarantee below is safe. | |
3416 guarantee( has_aborted() || _cm->region_stack_empty(), | |
3417 "only way to exit the loop" ); | |
3418 | |
3419 if (_cm->verbose_low()) | |
3420 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", | |
3421 _task_id, _cm->region_stack_size()); | |
3422 } | |
3423 } | |
3424 | |
3425 void CMTask::print_stats() { | |
3426 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", | |
3427 _task_id, _calls); | |
3428 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", | |
3429 _elapsed_time_ms, _termination_time_ms); | |
3430 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", | |
3431 _step_times_ms.num(), _step_times_ms.avg(), | |
3432 _step_times_ms.sd()); | |
3433 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", | |
3434 _step_times_ms.maximum(), _step_times_ms.sum()); | |
3435 | |
3436 #if _MARKING_STATS_ | |
3437 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", | |
3438 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), | |
3439 _all_clock_intervals_ms.sd()); | |
3440 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", | |
3441 _all_clock_intervals_ms.maximum(), | |
3442 _all_clock_intervals_ms.sum()); | |
3443 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", | |
3444 _clock_due_to_scanning, _clock_due_to_marking); | |
3445 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", | |
3446 _objs_scanned, _objs_found_on_bitmap); | |
3447 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", | |
3448 _local_pushes, _local_pops, _local_max_size); | |
3449 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", | |
3450 _global_pushes, _global_pops, _global_max_size); | |
3451 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", | |
3452 _global_transfers_to,_global_transfers_from); | |
3453 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", | |
3454 _regions_claimed, _region_stack_pops); | |
3455 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); | |
3456 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", | |
3457 _steal_attempts, _steals); | |
3458 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); | |
3459 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", | |
3460 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); | |
3461 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", | |
3462 _aborted_timed_out, _aborted_satb, _aborted_termination); | |
3463 #endif // _MARKING_STATS_ | |
3464 } | |
3465 | |
3466 /***************************************************************************** | |
3467 | |
3468 The do_marking_step(time_target_ms) method is the building block | |
3469 of the parallel marking framework. It can be called in parallel | |
3470 with other invocations of do_marking_step() on different tasks | |
3471 (but only one per task, obviously) and concurrently with the | |
3472 mutator threads, or during remark, hence it eliminates the need | |
3473 for two versions of the code. When called during remark, it will | |
3474 pick up from where the task left off during the concurrent marking | |
3475 phase. Interestingly, tasks are also claimable during evacuation | |
3476 pauses too, since do_marking_step() ensures that it aborts before | |
3477 it needs to yield. | |
3478 | |
3479 The data structures that is uses to do marking work are the | |
3480 following: | |
3481 | |
3482 (1) Marking Bitmap. If there are gray objects that appear only | |
3483 on the bitmap (this happens either when dealing with an overflow | |
3484 or when the initial marking phase has simply marked the roots | |
3485 and didn't push them on the stack), then tasks claim heap | |
3486 regions whose bitmap they then scan to find gray objects. A | |
3487 global finger indicates where the end of the last claimed region | |
3488 is. A local finger indicates how far into the region a task has | |
3489 scanned. The two fingers are used to determine how to gray an | |
3490 object (i.e. whether simply marking it is OK, as it will be | |
3491 visited by a task in the future, or whether it needs to be also | |
3492 pushed on a stack). | |
3493 | |
3494 (2) Local Queue. The local queue of the task which is accessed | |
3495 reasonably efficiently by the task. Other tasks can steal from | |
3496 it when they run out of work. Throughout the marking phase, a | |
3497 task attempts to keep its local queue short but not totally | |
3498 empty, so that entries are available for stealing by other | |
3499 tasks. Only when there is no more work, a task will totally | |
3500 drain its local queue. | |
3501 | |
3502 (3) Global Mark Stack. This handles local queue overflow. During | |
3503 marking only sets of entries are moved between it and the local | |
3504 queues, as access to it requires a mutex and more fine-grain | |
3505 interaction with it which might cause contention. If it | |
3506 overflows, then the marking phase should restart and iterate | |
3507 over the bitmap to identify gray objects. Throughout the marking | |
3508 phase, tasks attempt to keep the global mark stack at a small | |
3509 length but not totally empty, so that entries are available for | |
3510 popping by other tasks. Only when there is no more work, tasks | |
3511 will totally drain the global mark stack. | |
3512 | |
3513 (4) Global Region Stack. Entries on it correspond to areas of | |
3514 the bitmap that need to be scanned since they contain gray | |
3515 objects. Pushes on the region stack only happen during | |
3516 evacuation pauses and typically correspond to areas covered by | |
3517 GC LABS. If it overflows, then the marking phase should restart | |
3518 and iterate over the bitmap to identify gray objects. Tasks will | |
3519 try to totally drain the region stack as soon as possible. | |
3520 | |
3521 (5) SATB Buffer Queue. This is where completed SATB buffers are | |
3522 made available. Buffers are regularly removed from this queue | |
3523 and scanned for roots, so that the queue doesn't get too | |
3524 long. During remark, all completed buffers are processed, as | |
3525 well as the filled in parts of any uncompleted buffers. | |
3526 | |
3527 The do_marking_step() method tries to abort when the time target | |
3528 has been reached. There are a few other cases when the | |
3529 do_marking_step() method also aborts: | |
3530 | |
3531 (1) When the marking phase has been aborted (after a Full GC). | |
3532 | |
3533 (2) When a global overflow (either on the global stack or the | |
3534 region stack) has been triggered. Before the task aborts, it | |
3535 will actually sync up with the other tasks to ensure that all | |
3536 the marking data structures (local queues, stacks, fingers etc.) | |
3537 are re-initialised so that when do_marking_step() completes, | |
3538 the marking phase can immediately restart. | |
3539 | |
3540 (3) When enough completed SATB buffers are available. The | |
3541 do_marking_step() method only tries to drain SATB buffers right | |
3542 at the beginning. So, if enough buffers are available, the | |
3543 marking step aborts and the SATB buffers are processed at | |
3544 the beginning of the next invocation. | |
3545 | |
3546 (4) To yield. when we have to yield then we abort and yield | |
3547 right at the end of do_marking_step(). This saves us from a lot | |
3548 of hassle as, by yielding we might allow a Full GC. If this | |
3549 happens then objects will be compacted underneath our feet, the | |
3550 heap might shrink, etc. We save checking for this by just | |
3551 aborting and doing the yield right at the end. | |
3552 | |
3553 From the above it follows that the do_marking_step() method should | |
3554 be called in a loop (or, otherwise, regularly) until it completes. | |
3555 | |
3556 If a marking step completes without its has_aborted() flag being | |
3557 true, it means it has completed the current marking phase (and | |
3558 also all other marking tasks have done so and have all synced up). | |
3559 | |
3560 A method called regular_clock_call() is invoked "regularly" (in | |
3561 sub ms intervals) throughout marking. It is this clock method that | |
3562 checks all the abort conditions which were mentioned above and | |
3563 decides when the task should abort. A work-based scheme is used to | |
3564 trigger this clock method: when the number of object words the | |
3565 marking phase has scanned or the number of references the marking | |
3566 phase has visited reach a given limit. Additional invocations to | |
3567 the method clock have been planted in a few other strategic places | |
3568 too. The initial reason for the clock method was to avoid calling | |
3569 vtime too regularly, as it is quite expensive. So, once it was in | |
3570 place, it was natural to piggy-back all the other conditions on it | |
3571 too and not constantly check them throughout the code. | |
3572 | |
3573 *****************************************************************************/ | |
3574 | |
3575 void CMTask::do_marking_step(double time_target_ms) { | |
3576 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" ); | |
3577 guarantee( concurrent() == _cm->concurrent(), "they should be the same" ); | |
3578 | |
3579 guarantee( concurrent() || _cm->region_stack_empty(), | |
3580 "the region stack should have been cleared before remark" ); | |
3581 guarantee( _region_finger == NULL, | |
3582 "this should be non-null only when a region is being scanned" ); | |
3583 | |
3584 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); | |
3585 guarantee( _task_queues != NULL, "invariant" ); | |
3586 guarantee( _task_queue != NULL, "invariant" ); | |
3587 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" ); | |
3588 | |
3589 guarantee( !_claimed, | |
3590 "only one thread should claim this task at any one time" ); | |
3591 | |
3592 // OK, this doesn't safeguard again all possible scenarios, as it is | |
3593 // possible for two threads to set the _claimed flag at the same | |
3594 // time. But it is only for debugging purposes anyway and it will | |
3595 // catch most problems. | |
3596 _claimed = true; | |
3597 | |
3598 _start_time_ms = os::elapsedVTime() * 1000.0; | |
3599 statsOnly( _interval_start_time_ms = _start_time_ms ); | |
3600 | |
3601 double diff_prediction_ms = | |
3602 g1_policy->get_new_prediction(&_marking_step_diffs_ms); | |
3603 _time_target_ms = time_target_ms - diff_prediction_ms; | |
3604 | |
3605 // set up the variables that are used in the work-based scheme to | |
3606 // call the regular clock method | |
3607 _words_scanned = 0; | |
3608 _refs_reached = 0; | |
3609 recalculate_limits(); | |
3610 | |
3611 // clear all flags | |
3612 clear_has_aborted(); | |
3613 _has_aborted_timed_out = false; | |
3614 _draining_satb_buffers = false; | |
3615 | |
3616 ++_calls; | |
3617 | |
3618 if (_cm->verbose_low()) | |
3619 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " | |
3620 "target = %1.2lfms >>>>>>>>>>", | |
3621 _task_id, _calls, _time_target_ms); | |
3622 | |
3623 // Set up the bitmap and oop closures. Anything that uses them is | |
3624 // eventually called from this method, so it is OK to allocate these | |
3625 // statically. | |
3626 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); | |
3627 CMOopClosure oop_closure(_g1h, _cm, this); | |
3628 set_oop_closure(&oop_closure); | |
3629 | |
3630 if (_cm->has_overflown()) { | |
3631 // This can happen if the region stack or the mark stack overflows | |
3632 // during a GC pause and this task, after a yield point, | |
3633 // restarts. We have to abort as we need to get into the overflow | |
3634 // protocol which happens right at the end of this task. | |
3635 set_has_aborted(); | |
3636 } | |
3637 | |
3638 // First drain any available SATB buffers. After this, we will not | |
3639 // look at SATB buffers before the next invocation of this method. | |
3640 // If enough completed SATB buffers are queued up, the regular clock | |
3641 // will abort this task so that it restarts. | |
3642 drain_satb_buffers(); | |
3643 // ...then partially drain the local queue and the global stack | |
3644 drain_local_queue(true); | |
3645 drain_global_stack(true); | |
3646 | |
3647 // Then totally drain the region stack. We will not look at | |
3648 // it again before the next invocation of this method. Entries on | |
3649 // the region stack are only added during evacuation pauses, for | |
3650 // which we have to yield. When we do, we abort the task anyway so | |
3651 // it will look at the region stack again when it restarts. | |
3652 bitmap_closure.set_scanning_heap_region(false); | |
3653 drain_region_stack(&bitmap_closure); | |
3654 // ...then partially drain the local queue and the global stack | |
3655 drain_local_queue(true); | |
3656 drain_global_stack(true); | |
3657 | |
3658 do { | |
3659 if (!has_aborted() && _curr_region != NULL) { | |
3660 // This means that we're already holding on to a region. | |
3661 tmp_guarantee_CM( _finger != NULL, | |
3662 "if region is not NULL, then the finger " | |
3663 "should not be NULL either" ); | |
3664 | |
3665 // We might have restarted this task after an evacuation pause | |
3666 // which might have evacuated the region we're holding on to | |
3667 // underneath our feet. Let's read its limit again to make sure | |
3668 // that we do not iterate over a region of the heap that | |
3669 // contains garbage (update_region_limit() will also move | |
3670 // _finger to the start of the region if it is found empty). | |
3671 update_region_limit(); | |
3672 // We will start from _finger not from the start of the region, | |
3673 // as we might be restarting this task after aborting half-way | |
3674 // through scanning this region. In this case, _finger points to | |
3675 // the address where we last found a marked object. If this is a | |
3676 // fresh region, _finger points to start(). | |
3677 MemRegion mr = MemRegion(_finger, _region_limit); | |
3678 | |
3679 if (_cm->verbose_low()) | |
3680 gclog_or_tty->print_cr("[%d] we're scanning part " | |
3681 "["PTR_FORMAT", "PTR_FORMAT") " | |
3682 "of region "PTR_FORMAT, | |
3683 _task_id, _finger, _region_limit, _curr_region); | |
3684 | |
3685 // Let's iterate over the bitmap of the part of the | |
3686 // region that is left. | |
3687 bitmap_closure.set_scanning_heap_region(true); | |
3688 if (mr.is_empty() || | |
3689 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { | |
3690 // We successfully completed iterating over the region. Now, | |
3691 // let's give up the region. | |
3692 giveup_current_region(); | |
3693 regular_clock_call(); | |
3694 } else { | |
3695 guarantee( has_aborted(), "currently the only way to do so" ); | |
3696 // The only way to abort the bitmap iteration is to return | |
3697 // false from the do_bit() method. However, inside the | |
3698 // do_bit() method we move the _finger to point to the | |
3699 // object currently being looked at. So, if we bail out, we | |
3700 // have definitely set _finger to something non-null. | |
3701 guarantee( _finger != NULL, "invariant" ); | |
3702 | |
3703 // Region iteration was actually aborted. So now _finger | |
3704 // points to the address of the object we last scanned. If we | |
3705 // leave it there, when we restart this task, we will rescan | |
3706 // the object. It is easy to avoid this. We move the finger by | |
3707 // enough to point to the next possible object header (the | |
3708 // bitmap knows by how much we need to move it as it knows its | |
3709 // granularity). | |
3710 move_finger_to(_nextMarkBitMap->nextWord(_finger)); | |
3711 } | |
3712 } | |
3713 // At this point we have either completed iterating over the | |
3714 // region we were holding on to, or we have aborted. | |
3715 | |
3716 // We then partially drain the local queue and the global stack. | |
3717 // (Do we really need this?) | |
3718 drain_local_queue(true); | |
3719 drain_global_stack(true); | |
3720 | |
3721 // Read the note on the claim_region() method on why it might | |
3722 // return NULL with potentially more regions available for | |
3723 // claiming and why we have to check out_of_regions() to determine | |
3724 // whether we're done or not. | |
3725 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { | |
3726 // We are going to try to claim a new region. We should have | |
3727 // given up on the previous one. | |
3728 tmp_guarantee_CM( _curr_region == NULL && | |
3729 _finger == NULL && | |
3730 _region_limit == NULL, "invariant" ); | |
3731 if (_cm->verbose_low()) | |
3732 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); | |
3733 HeapRegion* claimed_region = _cm->claim_region(_task_id); | |
3734 if (claimed_region != NULL) { | |
3735 // Yes, we managed to claim one | |
3736 statsOnly( ++_regions_claimed ); | |
3737 | |
3738 if (_cm->verbose_low()) | |
3739 gclog_or_tty->print_cr("[%d] we successfully claimed " | |
3740 "region "PTR_FORMAT, | |
3741 _task_id, claimed_region); | |
3742 | |
3743 setup_for_region(claimed_region); | |
3744 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" ); | |
3745 } | |
3746 // It is important to call the regular clock here. It might take | |
3747 // a while to claim a region if, for example, we hit a large | |
3748 // block of empty regions. So we need to call the regular clock | |
3749 // method once round the loop to make sure it's called | |
3750 // frequently enough. | |
3751 regular_clock_call(); | |
3752 } | |
3753 | |
3754 if (!has_aborted() && _curr_region == NULL) { | |
3755 tmp_guarantee_CM( _cm->out_of_regions(), | |
3756 "at this point we should be out of regions" ); | |
3757 } | |
3758 } while ( _curr_region != NULL && !has_aborted()); | |
3759 | |
3760 if (!has_aborted()) { | |
3761 // We cannot check whether the global stack is empty, since other | |
343
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3762 // tasks might be pushing objects to it concurrently. We also cannot |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3763 // check if the region stack is empty because if a thread is aborting |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3764 // it can push a partially done region back. |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3765 tmp_guarantee_CM( _cm->out_of_regions(), |
342 | 3766 "at this point we should be out of regions" ); |
3767 | |
3768 if (_cm->verbose_low()) | |
3769 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); | |
3770 | |
3771 // Try to reduce the number of available SATB buffers so that | |
3772 // remark has less work to do. | |
3773 drain_satb_buffers(); | |
3774 } | |
3775 | |
3776 // Since we've done everything else, we can now totally drain the | |
3777 // local queue and global stack. | |
3778 drain_local_queue(false); | |
3779 drain_global_stack(false); | |
3780 | |
3781 // Attempt at work stealing from other task's queues. | |
3782 if (!has_aborted()) { | |
3783 // We have not aborted. This means that we have finished all that | |
3784 // we could. Let's try to do some stealing... | |
3785 | |
3786 // We cannot check whether the global stack is empty, since other | |
343
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3787 // tasks might be pushing objects to it concurrently. We also cannot |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3788 // check if the region stack is empty because if a thread is aborting |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3789 // it can push a partially done region back. |
342 | 3790 guarantee( _cm->out_of_regions() && |
3791 _task_queue->size() == 0, "only way to reach here" ); | |
3792 | |
3793 if (_cm->verbose_low()) | |
3794 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); | |
3795 | |
3796 while (!has_aborted()) { | |
3797 oop obj; | |
3798 statsOnly( ++_steal_attempts ); | |
3799 | |
3800 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { | |
3801 if (_cm->verbose_medium()) | |
3802 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", | |
3803 _task_id, (void*) obj); | |
3804 | |
3805 statsOnly( ++_steals ); | |
3806 | |
3807 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), | |
3808 "any stolen object should be marked" ); | |
3809 scan_object(obj); | |
3810 | |
3811 // And since we're towards the end, let's totally drain the | |
3812 // local queue and global stack. | |
3813 drain_local_queue(false); | |
3814 drain_global_stack(false); | |
3815 } else { | |
3816 break; | |
3817 } | |
3818 } | |
3819 } | |
3820 | |
3821 // We still haven't aborted. Now, let's try to get into the | |
3822 // termination protocol. | |
3823 if (!has_aborted()) { | |
3824 // We cannot check whether the global stack is empty, since other | |
343
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3825 // tasks might be concurrently pushing objects on it. We also cannot |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3826 // check if the region stack is empty because if a thread is aborting |
afc1ce1efe66
6710665: G1: guarantee(_cm->out_of_regions() && _cm->region_stack_empty() && _task_queue->size() == 0, ...)
iveresov
parents:
342
diff
changeset
|
3827 // it can push a partially done region back. |
342 | 3828 guarantee( _cm->out_of_regions() && |
3829 _task_queue->size() == 0, "only way to reach here" ); | |
3830 | |
3831 if (_cm->verbose_low()) | |
3832 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); | |
3833 | |
3834 _termination_start_time_ms = os::elapsedVTime() * 1000.0; | |
3835 // The CMTask class also extends the TerminatorTerminator class, | |
3836 // hence its should_exit_termination() method will also decide | |
3837 // whether to exit the termination protocol or not. | |
3838 bool finished = _cm->terminator()->offer_termination(this); | |
3839 double termination_end_time_ms = os::elapsedVTime() * 1000.0; | |
3840 _termination_time_ms += | |
3841 termination_end_time_ms - _termination_start_time_ms; | |
3842 | |
3843 if (finished) { | |
3844 // We're all done. | |
3845 | |
3846 if (_task_id == 0) { | |
3847 // let's allow task 0 to do this | |
3848 if (concurrent()) { | |
3849 guarantee( _cm->concurrent_marking_in_progress(), "invariant" ); | |
3850 // we need to set this to false before the next | |
3851 // safepoint. This way we ensure that the marking phase | |
3852 // doesn't observe any more heap expansions. | |
3853 _cm->clear_concurrent_marking_in_progress(); | |
3854 } | |
3855 } | |
3856 | |
3857 // We can now guarantee that the global stack is empty, since | |
3858 // all other tasks have finished. | |
3859 guarantee( _cm->out_of_regions() && | |
3860 _cm->region_stack_empty() && | |
3861 _cm->mark_stack_empty() && | |
3862 _task_queue->size() == 0 && | |
3863 !_cm->has_overflown() && | |
3864 !_cm->mark_stack_overflow() && | |
3865 !_cm->region_stack_overflow(), | |
3866 "only way to reach here" ); | |
3867 | |
3868 if (_cm->verbose_low()) | |
3869 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); | |
3870 } else { | |
3871 // Apparently there's more work to do. Let's abort this task. It | |
3872 // will restart it and we can hopefully find more things to do. | |
3873 | |
3874 if (_cm->verbose_low()) | |
3875 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); | |
3876 | |
3877 set_has_aborted(); | |
3878 statsOnly( ++_aborted_termination ); | |
3879 } | |
3880 } | |
3881 | |
3882 // Mainly for debugging purposes to make sure that a pointer to the | |
3883 // closure which was statically allocated in this frame doesn't | |
3884 // escape it by accident. | |
3885 set_oop_closure(NULL); | |
3886 double end_time_ms = os::elapsedVTime() * 1000.0; | |
3887 double elapsed_time_ms = end_time_ms - _start_time_ms; | |
3888 // Update the step history. | |
3889 _step_times_ms.add(elapsed_time_ms); | |
3890 | |
3891 if (has_aborted()) { | |
3892 // The task was aborted for some reason. | |
3893 | |
3894 statsOnly( ++_aborted ); | |
3895 | |
3896 if (_has_aborted_timed_out) { | |
3897 double diff_ms = elapsed_time_ms - _time_target_ms; | |
3898 // Keep statistics of how well we did with respect to hitting | |
3899 // our target only if we actually timed out (if we aborted for | |
3900 // other reasons, then the results might get skewed). | |
3901 _marking_step_diffs_ms.add(diff_ms); | |
3902 } | |
3903 | |
3904 if (_cm->has_overflown()) { | |
3905 // This is the interesting one. We aborted because a global | |
3906 // overflow was raised. This means we have to restart the | |
3907 // marking phase and start iterating over regions. However, in | |
3908 // order to do this we have to make sure that all tasks stop | |
3909 // what they are doing and re-initialise in a safe manner. We | |
3910 // will achieve this with the use of two barrier sync points. | |
3911 | |
3912 if (_cm->verbose_low()) | |
3913 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); | |
3914 | |
3915 _cm->enter_first_sync_barrier(_task_id); | |
3916 // When we exit this sync barrier we know that all tasks have | |
3917 // stopped doing marking work. So, it's now safe to | |
3918 // re-initialise our data structures. At the end of this method, | |
3919 // task 0 will clear the global data structures. | |
3920 | |
3921 statsOnly( ++_aborted_overflow ); | |
3922 | |
3923 // We clear the local state of this task... | |
3924 clear_region_fields(); | |
3925 | |
3926 // ...and enter the second barrier. | |
3927 _cm->enter_second_sync_barrier(_task_id); | |
3928 // At this point everything has bee re-initialised and we're | |
3929 // ready to restart. | |
3930 } | |
3931 | |
3932 if (_cm->verbose_low()) { | |
3933 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " | |
3934 "elapsed = %1.2lfms <<<<<<<<<<", | |
3935 _task_id, _time_target_ms, elapsed_time_ms); | |
3936 if (_cm->has_aborted()) | |
3937 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", | |
3938 _task_id); | |
3939 } | |
3940 } else { | |
3941 if (_cm->verbose_low()) | |
3942 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " | |
3943 "elapsed = %1.2lfms <<<<<<<<<<", | |
3944 _task_id, _time_target_ms, elapsed_time_ms); | |
3945 } | |
3946 | |
3947 _claimed = false; | |
3948 } | |
3949 | |
3950 CMTask::CMTask(int task_id, | |
3951 ConcurrentMark* cm, | |
3952 CMTaskQueue* task_queue, | |
3953 CMTaskQueueSet* task_queues) | |
3954 : _g1h(G1CollectedHeap::heap()), | |
3955 _co_tracker(G1CMGroup), | |
3956 _task_id(task_id), _cm(cm), | |
3957 _claimed(false), | |
3958 _nextMarkBitMap(NULL), _hash_seed(17), | |
3959 _task_queue(task_queue), | |
3960 _task_queues(task_queues), | |
3961 _oop_closure(NULL) { | |
3962 guarantee( task_queue != NULL, "invariant" ); | |
3963 guarantee( task_queues != NULL, "invariant" ); | |
3964 | |
3965 statsOnly( _clock_due_to_scanning = 0; | |
3966 _clock_due_to_marking = 0 ); | |
3967 | |
3968 _marking_step_diffs_ms.add(0.5); | |
3969 } |