comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 342:37f87013dfd8

6711316: Open source the Garbage-First garbage collector Summary: First mercurial integration of the code for the Garbage-First garbage collector. Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr
author ysr
date Thu, 05 Jun 2008 15:57:56 -0700
parents
children afc1ce1efe66
comparison
equal deleted inserted replaced
189:0b27f3512f9e 342:37f87013dfd8
1 /*
2 * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
27
28 //
29 // CMS Bit Map Wrapper
30
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
38
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
51
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
67
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
80
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
85
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
96
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
106
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
116
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
121
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
132
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
140
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
154
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
162
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
174
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
178
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
199
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
226
227
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
244
245
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
263
264
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
266
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
276
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
280
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
299
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
304
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
324
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
347
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
372
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
382
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
388
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
392
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
397
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
410
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
414
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
420
421 _has_overflown(false),
422 _concurrent(false),
423
424 // _verbose_level set below
425
426 _init_times(),
427 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
428 _cleanup_times(),
429 _total_counting_time(0.0),
430 _total_rs_scrub_time(0.0),
431
432 _parallel_workers(NULL),
433 _cleanup_co_tracker(G1CLGroup)
434 {
435 CMVerboseLevel verbose_level =
436 (CMVerboseLevel) G1MarkingVerboseLevel;
437 if (verbose_level < no_verbose)
438 verbose_level = no_verbose;
439 if (verbose_level > high_verbose)
440 verbose_level = high_verbose;
441 _verbose_level = verbose_level;
442
443 if (verbose_low())
444 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
445 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
446
447 _markStack.allocate(G1CMStackSize);
448 _regionStack.allocate(G1CMRegionStackSize);
449
450 // Create & start a ConcurrentMark thread.
451 if (G1ConcMark) {
452 _cmThread = new ConcurrentMarkThread(this);
453 assert(cmThread() != NULL, "CM Thread should have been created");
454 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
455 } else {
456 _cmThread = NULL;
457 }
458 _g1h = G1CollectedHeap::heap();
459 assert(CGC_lock != NULL, "Where's the CGC_lock?");
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
462
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
464 satb_qs.set_buffer_size(G1SATBLogBufferSize);
465
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
468 for (int i = 0 ; i < size; i++) {
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
470 }
471
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
474
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
476 _active_tasks = _max_task_num;
477 for (int i = 0; i < (int) _max_task_num; ++i) {
478 CMTaskQueue* task_queue = new CMTaskQueue();
479 task_queue->initialize();
480 _task_queues->register_queue(i, task_queue);
481
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
483 _accum_task_vtime[i] = 0.0;
484 }
485
486 if (ParallelMarkingThreads > ParallelGCThreads) {
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
488 "than ParallelGCThreads.");
489 }
490 if (ParallelGCThreads == 0) {
491 // if we are not running with any parallel GC threads we will not
492 // spawn any marking threads either
493 _parallel_marking_threads = 0;
494 _sleep_factor = 0.0;
495 _marking_task_overhead = 1.0;
496 } else {
497 if (ParallelMarkingThreads > 0) {
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
499 // if both are set
500
501 _parallel_marking_threads = ParallelMarkingThreads;
502 _sleep_factor = 0.0;
503 _marking_task_overhead = 1.0;
504 } else if (G1MarkingOverheadPerc > 0) {
505 // we will calculate the number of parallel marking threads
506 // based on a target overhead with respect to the soft real-time
507 // goal
508
509 double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
510 double overall_cm_overhead =
511 (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
512 double cpu_ratio = 1.0 / (double) os::processor_count();
513 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
514 double marking_task_overhead =
515 overall_cm_overhead / marking_thread_num *
516 (double) os::processor_count();
517 double sleep_factor =
518 (1.0 - marking_task_overhead) / marking_task_overhead;
519
520 _parallel_marking_threads = (size_t) marking_thread_num;
521 _sleep_factor = sleep_factor;
522 _marking_task_overhead = marking_task_overhead;
523 } else {
524 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
525 _sleep_factor = 0.0;
526 _marking_task_overhead = 1.0;
527 }
528
529 if (parallel_marking_threads() > 1)
530 _cleanup_task_overhead = 1.0;
531 else
532 _cleanup_task_overhead = marking_task_overhead();
533 _cleanup_sleep_factor =
534 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
535
536 #if 0
537 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
538 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
539 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
540 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
541 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
542 #endif
543
544 guarantee( parallel_marking_threads() > 0, "peace of mind" );
545 _parallel_workers = new WorkGang("Parallel Marking Threads",
546 (int) parallel_marking_threads(), false, true);
547 if (_parallel_workers == NULL)
548 vm_exit_during_initialization("Failed necessary allocation.");
549 }
550
551 // so that the call below can read a sensible value
552 _heap_start = (HeapWord*) rs.base();
553 set_non_marking_state();
554 }
555
556 void ConcurrentMark::update_g1_committed(bool force) {
557 // If concurrent marking is not in progress, then we do not need to
558 // update _heap_end. This has a subtle and important
559 // side-effect. Imagine that two evacuation pauses happen between
560 // marking completion and remark. The first one can grow the
561 // heap (hence now the finger is below the heap end). Then, the
562 // second one could unnecessarily push regions on the region
563 // stack. This causes the invariant that the region stack is empty
564 // at the beginning of remark to be false. By ensuring that we do
565 // not observe heap expansions after marking is complete, then we do
566 // not have this problem.
567 if (!concurrent_marking_in_progress() && !force)
568 return;
569
570 MemRegion committed = _g1h->g1_committed();
571 tmp_guarantee_CM( committed.start() == _heap_start,
572 "start shouldn't change" );
573 HeapWord* new_end = committed.end();
574 if (new_end > _heap_end) {
575 // The heap has been expanded.
576
577 _heap_end = new_end;
578 }
579 // Notice that the heap can also shrink. However, this only happens
580 // during a Full GC (at least currently) and the entire marking
581 // phase will bail out and the task will not be restarted. So, let's
582 // do nothing.
583 }
584
585 void ConcurrentMark::reset() {
586 // Starting values for these two. This should be called in a STW
587 // phase. CM will be notified of any future g1_committed expansions
588 // will be at the end of evacuation pauses, when tasks are
589 // inactive.
590 MemRegion committed = _g1h->g1_committed();
591 _heap_start = committed.start();
592 _heap_end = committed.end();
593
594 guarantee( _heap_start != NULL &&
595 _heap_end != NULL &&
596 _heap_start < _heap_end, "heap bounds should look ok" );
597
598 // reset all the marking data structures and any necessary flags
599 clear_marking_state();
600
601 if (verbose_low())
602 gclog_or_tty->print_cr("[global] resetting");
603
604 // We do reset all of them, since different phases will use
605 // different number of active threads. So, it's easiest to have all
606 // of them ready.
607 for (int i = 0; i < (int) _max_task_num; ++i)
608 _tasks[i]->reset(_nextMarkBitMap);
609
610 // we need this to make sure that the flag is on during the evac
611 // pause with initial mark piggy-backed
612 set_concurrent_marking_in_progress();
613 }
614
615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
616 guarantee( active_tasks <= _max_task_num, "we should not have more" );
617
618 _active_tasks = active_tasks;
619 // Need to update the three data structures below according to the
620 // number of active threads for this phase.
621 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
622 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
623 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
624
625 _concurrent = concurrent;
626 // We propagate this to all tasks, not just the active ones.
627 for (int i = 0; i < (int) _max_task_num; ++i)
628 _tasks[i]->set_concurrent(concurrent);
629
630 if (concurrent) {
631 set_concurrent_marking_in_progress();
632 } else {
633 // We currently assume that the concurrent flag has been set to
634 // false before we start remark. At this point we should also be
635 // in a STW phase.
636 guarantee( !concurrent_marking_in_progress(), "invariant" );
637 guarantee( _finger == _heap_end, "only way to get here" );
638 update_g1_committed(true);
639 }
640 }
641
642 void ConcurrentMark::set_non_marking_state() {
643 // We set the global marking state to some default values when we're
644 // not doing marking.
645 clear_marking_state();
646 _active_tasks = 0;
647 clear_concurrent_marking_in_progress();
648 }
649
650 ConcurrentMark::~ConcurrentMark() {
651 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
652 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
653 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
654 _par_cleanup_thread_state);
655
656 for (int i = 0; i < (int) _max_task_num; ++i) {
657 delete _task_queues->queue(i);
658 delete _tasks[i];
659 }
660 delete _task_queues;
661 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
662 }
663
664 // This closure is used to mark refs into the g1 generation
665 // from external roots in the CMS bit map.
666 // Called at the first checkpoint.
667 //
668
669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
670 #if PRINT_REACHABLE_AT_INITIAL_MARK
671 static FILE* reachable_file = NULL;
672
673 class PrintReachableClosure: public OopsInGenClosure {
674 CMBitMap* _bm;
675 int _level;
676 public:
677 PrintReachableClosure(CMBitMap* bm) :
678 _bm(bm), _level(0) {
679 guarantee(reachable_file != NULL, "pre-condition");
680 }
681 void do_oop(oop* p) {
682 oop obj = *p;
683 HeapWord* obj_addr = (HeapWord*)obj;
684 if (obj == NULL) return;
685 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
686 _level, p, (void*) obj, _bm->isMarked(obj_addr));
687 if (!_bm->isMarked(obj_addr)) {
688 _bm->mark(obj_addr);
689 _level++;
690 obj->oop_iterate(this);
691 _level--;
692 }
693 }
694 };
695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
696
697 #define SEND_HEAP_DUMP_TO_FILE 0
698 #if SEND_HEAP_DUMP_TO_FILE
699 static FILE* heap_dump_file = NULL;
700 #endif // SEND_HEAP_DUMP_TO_FILE
701
702 void ConcurrentMark::clearNextBitmap() {
703 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
704
705 // clear the mark bitmap (no grey objects to start with).
706 // We need to do this in chunks and offer to yield in between
707 // each chunk.
708 HeapWord* start = _nextMarkBitMap->startWord();
709 HeapWord* end = _nextMarkBitMap->endWord();
710 HeapWord* cur = start;
711 size_t chunkSize = M;
712 while (cur < end) {
713 HeapWord* next = cur + chunkSize;
714 if (next > end)
715 next = end;
716 MemRegion mr(cur,next);
717 _nextMarkBitMap->clearRange(mr);
718 cur = next;
719 do_yield_check();
720 }
721 }
722
723 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
724 public:
725 bool doHeapRegion(HeapRegion* r) {
726 if (!r->continuesHumongous()) {
727 r->note_start_of_marking(true);
728 }
729 return false;
730 }
731 };
732
733 void ConcurrentMark::checkpointRootsInitialPre() {
734 G1CollectedHeap* g1h = G1CollectedHeap::heap();
735 G1CollectorPolicy* g1p = g1h->g1_policy();
736
737 _has_aborted = false;
738
739 // Find all the reachable objects...
740 #if PRINT_REACHABLE_AT_INITIAL_MARK
741 guarantee(reachable_file == NULL, "Protocol");
742 char fn_buf[100];
743 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
744 reachable_file = fopen(fn_buf, "w");
745 // clear the mark bitmap (no grey objects to start with)
746 _nextMarkBitMap->clearAll();
747 PrintReachableClosure prcl(_nextMarkBitMap);
748 g1h->process_strong_roots(
749 false, // fake perm gen collection
750 SharedHeap::SO_AllClasses,
751 &prcl, // Regular roots
752 &prcl // Perm Gen Roots
753 );
754 // The root iteration above "consumed" dirty cards in the perm gen.
755 // Therefore, as a shortcut, we dirty all such cards.
756 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
757 fclose(reachable_file);
758 reachable_file = NULL;
759 // clear the mark bitmap again.
760 _nextMarkBitMap->clearAll();
761 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
762 COMPILER2_PRESENT(DerivedPointerTable::clear());
763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
764
765 // Initialise marking structures. This has to be done in a STW phase.
766 reset();
767 }
768
769 class CMMarkRootsClosure: public OopsInGenClosure {
770 private:
771 ConcurrentMark* _cm;
772 G1CollectedHeap* _g1h;
773 bool _do_barrier;
774
775 public:
776 CMMarkRootsClosure(ConcurrentMark* cm,
777 G1CollectedHeap* g1h,
778 bool do_barrier) : _cm(cm), _g1h(g1h),
779 _do_barrier(do_barrier) { }
780
781 virtual void do_oop(narrowOop* p) {
782 guarantee(false, "NYI");
783 }
784
785 virtual void do_oop(oop* p) {
786 oop thisOop = *p;
787 if (thisOop != NULL) {
788 assert(thisOop->is_oop() || thisOop->mark() == NULL,
789 "expected an oop, possibly with mark word displaced");
790 HeapWord* addr = (HeapWord*)thisOop;
791 if (_g1h->is_in_g1_reserved(addr)) {
792 _cm->grayRoot(thisOop);
793 }
794 }
795 if (_do_barrier) {
796 assert(!_g1h->is_in_g1_reserved(p),
797 "Should be called on external roots");
798 do_barrier(p);
799 }
800 }
801 };
802
803 void ConcurrentMark::checkpointRootsInitialPost() {
804 G1CollectedHeap* g1h = G1CollectedHeap::heap();
805
806 // For each region note start of marking.
807 NoteStartOfMarkHRClosure startcl;
808 g1h->heap_region_iterate(&startcl);
809
810 // Start weak-reference discovery.
811 ReferenceProcessor* rp = g1h->ref_processor();
812 rp->verify_no_references_recorded();
813 rp->enable_discovery(); // enable ("weak") refs discovery
814
815 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
816 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
817 satb_mq_set.set_active_all_threads(true);
818
819 // update_g1_committed() will be called at the end of an evac pause
820 // when marking is on. So, it's also called at the end of the
821 // initial-mark pause to update the heap end, if the heap expands
822 // during it. No need to call it here.
823
824 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
825
826 size_t max_marking_threads =
827 MAX2((size_t) 1, parallel_marking_threads());
828 for (int i = 0; i < (int)_max_task_num; ++i) {
829 _tasks[i]->enable_co_tracker();
830 if (i < (int) max_marking_threads)
831 _tasks[i]->reset_co_tracker(marking_task_overhead());
832 else
833 _tasks[i]->reset_co_tracker(0.0);
834 }
835 }
836
837 // Checkpoint the roots into this generation from outside
838 // this generation. [Note this initial checkpoint need only
839 // be approximate -- we'll do a catch up phase subsequently.]
840 void ConcurrentMark::checkpointRootsInitial() {
841 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
842 G1CollectedHeap* g1h = G1CollectedHeap::heap();
843
844 double start = os::elapsedTime();
845 GCOverheadReporter::recordSTWStart(start);
846
847 // If there has not been a GC[n-1] since last GC[n] cycle completed,
848 // precede our marking with a collection of all
849 // younger generations to keep floating garbage to a minimum.
850 // YSR: we won't do this for now -- it's an optimization to be
851 // done post-beta.
852
853 // YSR: ignoring weak refs for now; will do at bug fixing stage
854 // EVM: assert(discoveredRefsAreClear());
855
856
857 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
858 g1p->record_concurrent_mark_init_start();
859 checkpointRootsInitialPre();
860
861 // YSR: when concurrent precleaning is in place, we'll
862 // need to clear the cached card table here
863
864 ResourceMark rm;
865 HandleMark hm;
866
867 g1h->ensure_parsability(false);
868 g1h->perm_gen()->save_marks();
869
870 CMMarkRootsClosure notOlder(this, g1h, false);
871 CMMarkRootsClosure older(this, g1h, true);
872
873 g1h->set_marking_started();
874 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
875
876 g1h->process_strong_roots(false, // fake perm gen collection
877 SharedHeap::SO_AllClasses,
878 &notOlder, // Regular roots
879 &older // Perm Gen Roots
880 );
881 checkpointRootsInitialPost();
882
883 // Statistics.
884 double end = os::elapsedTime();
885 _init_times.add((end - start) * 1000.0);
886 GCOverheadReporter::recordSTWEnd(end);
887
888 g1p->record_concurrent_mark_init_end();
889 }
890
891 /*
892 Notice that in the next two methods, we actually leave the STS
893 during the barrier sync and join it immediately afterwards. If we
894 do not do this, this then the following deadlock can occur: one
895 thread could be in the barrier sync code, waiting for the other
896 thread to also sync up, whereas another one could be trying to
897 yield, while also waiting for the other threads to sync up too.
898
899 Because the thread that does the sync barrier has left the STS, it
900 is possible to be suspended for a Full GC or an evacuation pause
901 could occur. This is actually safe, since the entering the sync
902 barrier is one of the last things do_marking_step() does, and it
903 doesn't manipulate any data structures afterwards.
904 */
905
906 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
907 if (verbose_low())
908 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
909
910 ConcurrentGCThread::stsLeave();
911 _first_overflow_barrier_sync.enter();
912 ConcurrentGCThread::stsJoin();
913 // at this point everyone should have synced up and not be doing any
914 // more work
915
916 if (verbose_low())
917 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
918
919 // let task 0 do this
920 if (task_num == 0) {
921 // task 0 is responsible for clearing the global data structures
922 clear_marking_state();
923
924 if (PrintGC) {
925 gclog_or_tty->date_stamp(PrintGCDateStamps);
926 gclog_or_tty->stamp(PrintGCTimeStamps);
927 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
928 }
929 }
930
931 // after this, each task should reset its own data structures then
932 // then go into the second barrier
933 }
934
935 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
936 if (verbose_low())
937 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
938
939 ConcurrentGCThread::stsLeave();
940 _second_overflow_barrier_sync.enter();
941 ConcurrentGCThread::stsJoin();
942 // at this point everything should be re-initialised and ready to go
943
944 if (verbose_low())
945 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
946 }
947
948 void ConcurrentMark::grayRoot(oop p) {
949 HeapWord* addr = (HeapWord*) p;
950 // We can't really check against _heap_start and _heap_end, since it
951 // is possible during an evacuation pause with piggy-backed
952 // initial-mark that the committed space is expanded during the
953 // pause without CM observing this change. So the assertions below
954 // is a bit conservative; but better than nothing.
955 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
956 "address should be within the heap bounds" );
957
958 if (!_nextMarkBitMap->isMarked(addr))
959 _nextMarkBitMap->parMark(addr);
960 }
961
962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
963 // The objects on the region have already been marked "in bulk" by
964 // the caller. We only need to decide whether to push the region on
965 // the region stack or not.
966
967 if (!concurrent_marking_in_progress() || !_should_gray_objects)
968 // We're done with marking and waiting for remark. We do not need to
969 // push anything else on the region stack.
970 return;
971
972 HeapWord* finger = _finger;
973
974 if (verbose_low())
975 gclog_or_tty->print_cr("[global] attempting to push "
976 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
977 PTR_FORMAT, mr.start(), mr.end(), finger);
978
979 if (mr.start() < finger) {
980 // The finger is always heap region aligned and it is not possible
981 // for mr to span heap regions.
982 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
983
984 tmp_guarantee_CM( mr.start() <= mr.end() &&
985 _heap_start <= mr.start() &&
986 mr.end() <= _heap_end,
987 "region boundaries should fall within the committed space" );
988 if (verbose_low())
989 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
990 "below the finger, pushing it",
991 mr.start(), mr.end());
992
993 if (!region_stack_push(mr)) {
994 if (verbose_low())
995 gclog_or_tty->print_cr("[global] region stack has overflown.");
996 }
997 }
998 }
999
1000 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1001 // The object is not marked by the caller. We need to at least mark
1002 // it and maybe push in on the stack.
1003
1004 HeapWord* addr = (HeapWord*)p;
1005 if (!_nextMarkBitMap->isMarked(addr)) {
1006 // We definitely need to mark it, irrespective whether we bail out
1007 // because we're done with marking.
1008 if (_nextMarkBitMap->parMark(addr)) {
1009 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1010 // If we're done with concurrent marking and we're waiting for
1011 // remark, then we're not pushing anything on the stack.
1012 return;
1013
1014 // No OrderAccess:store_load() is needed. It is implicit in the
1015 // CAS done in parMark(addr) above
1016 HeapWord* finger = _finger;
1017
1018 if (addr < finger) {
1019 if (!mark_stack_push(oop(addr))) {
1020 if (verbose_low())
1021 gclog_or_tty->print_cr("[global] global stack overflow "
1022 "during parMark");
1023 }
1024 }
1025 }
1026 }
1027 }
1028
1029 class CMConcurrentMarkingTask: public AbstractGangTask {
1030 private:
1031 ConcurrentMark* _cm;
1032 ConcurrentMarkThread* _cmt;
1033
1034 public:
1035 void work(int worker_i) {
1036 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1037 "this should only be done by a conc GC thread" );
1038
1039 double start_vtime = os::elapsedVTime();
1040
1041 ConcurrentGCThread::stsJoin();
1042
1043 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1044 CMTask* the_task = _cm->task(worker_i);
1045 the_task->start_co_tracker();
1046 the_task->record_start_time();
1047 if (!_cm->has_aborted()) {
1048 do {
1049 double start_vtime_sec = os::elapsedVTime();
1050 double start_time_sec = os::elapsedTime();
1051 the_task->do_marking_step(10.0);
1052 double end_time_sec = os::elapsedTime();
1053 double end_vtime_sec = os::elapsedVTime();
1054 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1055 double elapsed_time_sec = end_time_sec - start_time_sec;
1056 _cm->clear_has_overflown();
1057
1058 bool ret = _cm->do_yield_check(worker_i);
1059
1060 jlong sleep_time_ms;
1061 if (!_cm->has_aborted() && the_task->has_aborted()) {
1062 sleep_time_ms =
1063 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1064 ConcurrentGCThread::stsLeave();
1065 os::sleep(Thread::current(), sleep_time_ms, false);
1066 ConcurrentGCThread::stsJoin();
1067 }
1068 double end_time2_sec = os::elapsedTime();
1069 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1070
1071 the_task->update_co_tracker();
1072
1073 #if 0
1074 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1075 "overhead %1.4lf",
1076 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1077 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1078 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1079 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1080 #endif
1081 } while (!_cm->has_aborted() && the_task->has_aborted());
1082 }
1083 the_task->record_end_time();
1084 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1085
1086 ConcurrentGCThread::stsLeave();
1087
1088 double end_vtime = os::elapsedVTime();
1089 the_task->update_co_tracker(true);
1090 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1091 }
1092
1093 CMConcurrentMarkingTask(ConcurrentMark* cm,
1094 ConcurrentMarkThread* cmt) :
1095 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1096
1097 ~CMConcurrentMarkingTask() { }
1098 };
1099
1100 void ConcurrentMark::markFromRoots() {
1101 // we might be tempted to assert that:
1102 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1103 // "inconsistent argument?");
1104 // However that wouldn't be right, because it's possible that
1105 // a safepoint is indeed in progress as a younger generation
1106 // stop-the-world GC happens even as we mark in this generation.
1107
1108 _restart_for_overflow = false;
1109
1110 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1111
1112 CMConcurrentMarkingTask markingTask(this, cmThread());
1113 if (parallel_marking_threads() > 0)
1114 _parallel_workers->run_task(&markingTask);
1115 else
1116 markingTask.work(0);
1117 print_stats();
1118 }
1119
1120 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1121 // world is stopped at this checkpoint
1122 assert(SafepointSynchronize::is_at_safepoint(),
1123 "world should be stopped");
1124 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1125
1126 // If a full collection has happened, we shouldn't do this.
1127 if (has_aborted()) {
1128 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1129 return;
1130 }
1131
1132 G1CollectorPolicy* g1p = g1h->g1_policy();
1133 g1p->record_concurrent_mark_remark_start();
1134
1135 double start = os::elapsedTime();
1136 GCOverheadReporter::recordSTWStart(start);
1137
1138 checkpointRootsFinalWork();
1139
1140 double mark_work_end = os::elapsedTime();
1141
1142 weakRefsWork(clear_all_soft_refs);
1143
1144 if (has_overflown()) {
1145 // Oops. We overflowed. Restart concurrent marking.
1146 _restart_for_overflow = true;
1147 // Clear the flag. We do not need it any more.
1148 clear_has_overflown();
1149 if (G1TraceMarkStackOverflow)
1150 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1151 } else {
1152 // We're done with marking.
1153 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1154 }
1155
1156 #if VERIFY_OBJS_PROCESSED
1157 _scan_obj_cl.objs_processed = 0;
1158 ThreadLocalObjQueue::objs_enqueued = 0;
1159 #endif
1160
1161 // Statistics
1162 double now = os::elapsedTime();
1163 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1164 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1165 _remark_times.add((now - start) * 1000.0);
1166
1167 GCOverheadReporter::recordSTWEnd(now);
1168 for (int i = 0; i < (int)_max_task_num; ++i)
1169 _tasks[i]->disable_co_tracker();
1170 _cleanup_co_tracker.enable();
1171 _cleanup_co_tracker.reset(cleanup_task_overhead());
1172 g1p->record_concurrent_mark_remark_end();
1173 }
1174
1175
1176 #define CARD_BM_TEST_MODE 0
1177
1178 class CalcLiveObjectsClosure: public HeapRegionClosure {
1179
1180 CMBitMapRO* _bm;
1181 ConcurrentMark* _cm;
1182 COTracker* _co_tracker;
1183 bool _changed;
1184 bool _yield;
1185 size_t _words_done;
1186 size_t _tot_live;
1187 size_t _tot_used;
1188 size_t _regions_done;
1189 double _start_vtime_sec;
1190
1191 BitMap* _region_bm;
1192 BitMap* _card_bm;
1193 intptr_t _bottom_card_num;
1194 bool _final;
1195
1196 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1197 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1198 #if CARD_BM_TEST_MODE
1199 guarantee(_card_bm->at(i - _bottom_card_num),
1200 "Should already be set.");
1201 #else
1202 _card_bm->par_at_put(i - _bottom_card_num, 1);
1203 #endif
1204 }
1205 }
1206
1207 public:
1208 CalcLiveObjectsClosure(bool final,
1209 CMBitMapRO *bm, ConcurrentMark *cm,
1210 BitMap* region_bm, BitMap* card_bm,
1211 COTracker* co_tracker) :
1212 _bm(bm), _cm(cm), _changed(false), _yield(true),
1213 _words_done(0), _tot_live(0), _tot_used(0),
1214 _region_bm(region_bm), _card_bm(card_bm),
1215 _final(final), _co_tracker(co_tracker),
1216 _regions_done(0), _start_vtime_sec(0.0)
1217 {
1218 _bottom_card_num =
1219 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1220 CardTableModRefBS::card_shift);
1221 }
1222
1223 bool doHeapRegion(HeapRegion* hr) {
1224 if (_co_tracker != NULL)
1225 _co_tracker->update();
1226
1227 if (!_final && _regions_done == 0)
1228 _start_vtime_sec = os::elapsedVTime();
1229
1230 if (hr->continuesHumongous()) return false;
1231
1232 HeapWord* nextTop = hr->next_top_at_mark_start();
1233 HeapWord* start = hr->top_at_conc_mark_count();
1234 assert(hr->bottom() <= start && start <= hr->end() &&
1235 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1236 start <= nextTop,
1237 "Preconditions.");
1238 // Otherwise, record the number of word's we'll examine.
1239 size_t words_done = (nextTop - start);
1240 // Find the first marked object at or after "start".
1241 start = _bm->getNextMarkedWordAddress(start, nextTop);
1242 size_t marked_bytes = 0;
1243
1244 // Below, the term "card num" means the result of shifting an address
1245 // by the card shift -- address 0 corresponds to card number 0. One
1246 // must subtract the card num of the bottom of the heap to obtain a
1247 // card table index.
1248 // The first card num of the sequence of live cards currently being
1249 // constructed. -1 ==> no sequence.
1250 intptr_t start_card_num = -1;
1251 // The last card num of the sequence of live cards currently being
1252 // constructed. -1 ==> no sequence.
1253 intptr_t last_card_num = -1;
1254
1255 while (start < nextTop) {
1256 if (_yield && _cm->do_yield_check()) {
1257 // We yielded. It might be for a full collection, in which case
1258 // all bets are off; terminate the traversal.
1259 if (_cm->has_aborted()) {
1260 _changed = false;
1261 return true;
1262 } else {
1263 // Otherwise, it might be a collection pause, and the region
1264 // we're looking at might be in the collection set. We'll
1265 // abandon this region.
1266 return false;
1267 }
1268 }
1269 oop obj = oop(start);
1270 int obj_sz = obj->size();
1271 // The card num of the start of the current object.
1272 intptr_t obj_card_num =
1273 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1274
1275 HeapWord* obj_last = start + obj_sz - 1;
1276 intptr_t obj_last_card_num =
1277 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1278
1279 if (obj_card_num != last_card_num) {
1280 if (start_card_num == -1) {
1281 assert(last_card_num == -1, "Both or neither.");
1282 start_card_num = obj_card_num;
1283 } else {
1284 assert(last_card_num != -1, "Both or neither.");
1285 assert(obj_card_num >= last_card_num, "Inv");
1286 if ((obj_card_num - last_card_num) > 1) {
1287 // Mark the last run, and start a new one.
1288 mark_card_num_range(start_card_num, last_card_num);
1289 start_card_num = obj_card_num;
1290 }
1291 }
1292 #if CARD_BM_TEST_MODE
1293 /*
1294 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1295 obj_card_num - _bottom_card_num,
1296 obj_last_card_num - _bottom_card_num);
1297 */
1298 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1299 _card_bm->par_at_put(j - _bottom_card_num, 1);
1300 }
1301 #endif
1302 }
1303 // In any case, we set the last card num.
1304 last_card_num = obj_last_card_num;
1305
1306 marked_bytes += obj_sz * HeapWordSize;
1307 // Find the next marked object after this one.
1308 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1309 _changed = true;
1310 }
1311 // Handle the last range, if any.
1312 if (start_card_num != -1)
1313 mark_card_num_range(start_card_num, last_card_num);
1314 if (_final) {
1315 // Mark the allocated-since-marking portion...
1316 HeapWord* tp = hr->top();
1317 if (nextTop < tp) {
1318 start_card_num =
1319 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1320 last_card_num =
1321 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1322 mark_card_num_range(start_card_num, last_card_num);
1323 // This definitely means the region has live objects.
1324 _region_bm->par_at_put(hr->hrs_index(), 1);
1325 }
1326 }
1327
1328 hr->add_to_marked_bytes(marked_bytes);
1329 // Update the live region bitmap.
1330 if (marked_bytes > 0) {
1331 _region_bm->par_at_put(hr->hrs_index(), 1);
1332 }
1333 hr->set_top_at_conc_mark_count(nextTop);
1334 _tot_live += hr->next_live_bytes();
1335 _tot_used += hr->used();
1336 _words_done = words_done;
1337
1338 if (!_final) {
1339 ++_regions_done;
1340 if (_regions_done % 10 == 0) {
1341 double end_vtime_sec = os::elapsedVTime();
1342 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1343 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1344 jlong sleep_time_ms =
1345 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1346 #if 0
1347 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1348 "overhead %1.4lf",
1349 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1350 _co_tracker->concOverhead(os::elapsedTime()));
1351 #endif
1352 os::sleep(Thread::current(), sleep_time_ms, false);
1353 _start_vtime_sec = end_vtime_sec;
1354 }
1355 }
1356 }
1357
1358 return false;
1359 }
1360
1361 bool changed() { return _changed; }
1362 void reset() { _changed = false; _words_done = 0; }
1363 void no_yield() { _yield = false; }
1364 size_t words_done() { return _words_done; }
1365 size_t tot_live() { return _tot_live; }
1366 size_t tot_used() { return _tot_used; }
1367 };
1368
1369
1370 void ConcurrentMark::calcDesiredRegions() {
1371 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1372 _cleanup_co_tracker.start();
1373
1374 _region_bm.clear();
1375 _card_bm.clear();
1376 CalcLiveObjectsClosure calccl(false /*final*/,
1377 nextMarkBitMap(), this,
1378 &_region_bm, &_card_bm,
1379 &_cleanup_co_tracker);
1380 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1381 g1h->heap_region_iterate(&calccl);
1382
1383 do {
1384 calccl.reset();
1385 g1h->heap_region_iterate(&calccl);
1386 } while (calccl.changed());
1387
1388 _cleanup_co_tracker.update(true);
1389 }
1390
1391 class G1ParFinalCountTask: public AbstractGangTask {
1392 protected:
1393 G1CollectedHeap* _g1h;
1394 CMBitMap* _bm;
1395 size_t _n_workers;
1396 size_t *_live_bytes;
1397 size_t *_used_bytes;
1398 BitMap* _region_bm;
1399 BitMap* _card_bm;
1400 public:
1401 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1402 BitMap* region_bm, BitMap* card_bm) :
1403 AbstractGangTask("G1 final counting"), _g1h(g1h),
1404 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1405 {
1406 if (ParallelGCThreads > 0)
1407 _n_workers = _g1h->workers()->total_workers();
1408 else
1409 _n_workers = 1;
1410 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1411 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1412 }
1413
1414 ~G1ParFinalCountTask() {
1415 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1416 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1417 }
1418
1419 void work(int i) {
1420 CalcLiveObjectsClosure calccl(true /*final*/,
1421 _bm, _g1h->concurrent_mark(),
1422 _region_bm, _card_bm,
1423 NULL /* CO tracker */);
1424 calccl.no_yield();
1425 if (ParallelGCThreads > 0) {
1426 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1);
1427 } else {
1428 _g1h->heap_region_iterate(&calccl);
1429 }
1430 assert(calccl.complete(), "Shouldn't have yielded!");
1431
1432 guarantee( (size_t)i < _n_workers, "invariant" );
1433 _live_bytes[i] = calccl.tot_live();
1434 _used_bytes[i] = calccl.tot_used();
1435 }
1436 size_t live_bytes() {
1437 size_t live_bytes = 0;
1438 for (size_t i = 0; i < _n_workers; ++i)
1439 live_bytes += _live_bytes[i];
1440 return live_bytes;
1441 }
1442 size_t used_bytes() {
1443 size_t used_bytes = 0;
1444 for (size_t i = 0; i < _n_workers; ++i)
1445 used_bytes += _used_bytes[i];
1446 return used_bytes;
1447 }
1448 };
1449
1450 class G1ParNoteEndTask;
1451
1452 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1453 G1CollectedHeap* _g1;
1454 int _worker_num;
1455 size_t _max_live_bytes;
1456 size_t _regions_claimed;
1457 size_t _freed_bytes;
1458 size_t _cleared_h_regions;
1459 size_t _freed_regions;
1460 UncleanRegionList* _unclean_region_list;
1461 double _claimed_region_time;
1462 double _max_region_time;
1463
1464 public:
1465 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1466 UncleanRegionList* list,
1467 int worker_num);
1468 size_t freed_bytes() { return _freed_bytes; }
1469 size_t cleared_h_regions() { return _cleared_h_regions; }
1470 size_t freed_regions() { return _freed_regions; }
1471 UncleanRegionList* unclean_region_list() {
1472 return _unclean_region_list;
1473 }
1474
1475 bool doHeapRegion(HeapRegion *r);
1476
1477 size_t max_live_bytes() { return _max_live_bytes; }
1478 size_t regions_claimed() { return _regions_claimed; }
1479 double claimed_region_time_sec() { return _claimed_region_time; }
1480 double max_region_time_sec() { return _max_region_time; }
1481 };
1482
1483 class G1ParNoteEndTask: public AbstractGangTask {
1484 friend class G1NoteEndOfConcMarkClosure;
1485 protected:
1486 G1CollectedHeap* _g1h;
1487 size_t _max_live_bytes;
1488 size_t _freed_bytes;
1489 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1490 public:
1491 G1ParNoteEndTask(G1CollectedHeap* g1h,
1492 ConcurrentMark::ParCleanupThreadState**
1493 par_cleanup_thread_state) :
1494 AbstractGangTask("G1 note end"), _g1h(g1h),
1495 _max_live_bytes(0), _freed_bytes(0),
1496 _par_cleanup_thread_state(par_cleanup_thread_state)
1497 {}
1498
1499 void work(int i) {
1500 double start = os::elapsedTime();
1501 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1502 &_par_cleanup_thread_state[i]->list,
1503 i);
1504 if (ParallelGCThreads > 0) {
1505 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 2);
1506 } else {
1507 _g1h->heap_region_iterate(&g1_note_end);
1508 }
1509 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1510
1511 // Now finish up freeing the current thread's regions.
1512 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1513 g1_note_end.cleared_h_regions(),
1514 0, NULL);
1515 {
1516 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1517 _max_live_bytes += g1_note_end.max_live_bytes();
1518 _freed_bytes += g1_note_end.freed_bytes();
1519 }
1520 double end = os::elapsedTime();
1521 if (G1PrintParCleanupStats) {
1522 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1523 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1524 i, start, end, (end-start)*1000.0,
1525 g1_note_end.regions_claimed(),
1526 g1_note_end.claimed_region_time_sec()*1000.0,
1527 g1_note_end.max_region_time_sec()*1000.0);
1528 }
1529 }
1530 size_t max_live_bytes() { return _max_live_bytes; }
1531 size_t freed_bytes() { return _freed_bytes; }
1532 };
1533
1534 class G1ParScrubRemSetTask: public AbstractGangTask {
1535 protected:
1536 G1RemSet* _g1rs;
1537 BitMap* _region_bm;
1538 BitMap* _card_bm;
1539 public:
1540 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1541 BitMap* region_bm, BitMap* card_bm) :
1542 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1543 _region_bm(region_bm), _card_bm(card_bm)
1544 {}
1545
1546 void work(int i) {
1547 if (ParallelGCThreads > 0) {
1548 _g1rs->scrub_par(_region_bm, _card_bm, i, 3);
1549 } else {
1550 _g1rs->scrub(_region_bm, _card_bm);
1551 }
1552 }
1553
1554 };
1555
1556 G1NoteEndOfConcMarkClosure::
1557 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1558 UncleanRegionList* list,
1559 int worker_num)
1560 : _g1(g1), _worker_num(worker_num),
1561 _max_live_bytes(0), _regions_claimed(0),
1562 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1563 _claimed_region_time(0.0), _max_region_time(0.0),
1564 _unclean_region_list(list)
1565 {}
1566
1567 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1568 // We use a claim value of zero here because all regions
1569 // were claimed with value 1 in the FinalCount task.
1570 r->reset_gc_time_stamp();
1571 if (!r->continuesHumongous()) {
1572 double start = os::elapsedTime();
1573 _regions_claimed++;
1574 r->note_end_of_marking();
1575 _max_live_bytes += r->max_live_bytes();
1576 _g1->free_region_if_totally_empty_work(r,
1577 _freed_bytes,
1578 _cleared_h_regions,
1579 _freed_regions,
1580 _unclean_region_list,
1581 true /*par*/);
1582 double region_time = (os::elapsedTime() - start);
1583 _claimed_region_time += region_time;
1584 if (region_time > _max_region_time) _max_region_time = region_time;
1585 }
1586 return false;
1587 }
1588
1589 void ConcurrentMark::cleanup() {
1590 // world is stopped at this checkpoint
1591 assert(SafepointSynchronize::is_at_safepoint(),
1592 "world should be stopped");
1593 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1594
1595 // If a full collection has happened, we shouldn't do this.
1596 if (has_aborted()) {
1597 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1598 return;
1599 }
1600
1601 _cleanup_co_tracker.disable();
1602
1603 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1604 g1p->record_concurrent_mark_cleanup_start();
1605
1606 double start = os::elapsedTime();
1607 GCOverheadReporter::recordSTWStart(start);
1608
1609 // Do counting once more with the world stopped for good measure.
1610 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1611 &_region_bm, &_card_bm);
1612 if (ParallelGCThreads > 0) {
1613 int n_workers = g1h->workers()->total_workers();
1614 g1h->set_par_threads(n_workers);
1615 g1h->workers()->run_task(&g1_par_count_task);
1616 g1h->set_par_threads(0);
1617 } else {
1618 g1_par_count_task.work(0);
1619 }
1620
1621 size_t known_garbage_bytes =
1622 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1623 #if 0
1624 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1625 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1626 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1627 (double) known_garbage_bytes / (double) (1024 * 1024));
1628 #endif // 0
1629 g1p->set_known_garbage_bytes(known_garbage_bytes);
1630
1631 size_t start_used_bytes = g1h->used();
1632 _at_least_one_mark_complete = true;
1633 g1h->set_marking_complete();
1634
1635 double count_end = os::elapsedTime();
1636 double this_final_counting_time = (count_end - start);
1637 if (G1PrintParCleanupStats) {
1638 gclog_or_tty->print_cr("Cleanup:");
1639 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1640 this_final_counting_time*1000.0);
1641 }
1642 _total_counting_time += this_final_counting_time;
1643
1644 // Install newly created mark bitMap as "prev".
1645 swapMarkBitMaps();
1646
1647 g1h->reset_gc_time_stamp();
1648
1649 // Note end of marking in all heap regions.
1650 double note_end_start = os::elapsedTime();
1651 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1652 if (ParallelGCThreads > 0) {
1653 int n_workers = g1h->workers()->total_workers();
1654 g1h->set_par_threads(n_workers);
1655 g1h->workers()->run_task(&g1_par_note_end_task);
1656 g1h->set_par_threads(0);
1657 } else {
1658 g1_par_note_end_task.work(0);
1659 }
1660 g1h->set_unclean_regions_coming(true);
1661 double note_end_end = os::elapsedTime();
1662 // Tell the mutators that there might be unclean regions coming...
1663 if (G1PrintParCleanupStats) {
1664 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1665 (note_end_end - note_end_start)*1000.0);
1666 }
1667
1668 // Now we "scrub" remembered sets. Note that we must do this before the
1669 // call below, since it affects the metric by which we sort the heap
1670 // regions.
1671 if (G1ScrubRemSets) {
1672 double rs_scrub_start = os::elapsedTime();
1673 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1674 if (ParallelGCThreads > 0) {
1675 int n_workers = g1h->workers()->total_workers();
1676 g1h->set_par_threads(n_workers);
1677 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1678 g1h->set_par_threads(0);
1679 } else {
1680 g1_par_scrub_rs_task.work(0);
1681 }
1682
1683 double rs_scrub_end = os::elapsedTime();
1684 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1685 _total_rs_scrub_time += this_rs_scrub_time;
1686 }
1687
1688 // this will also free any regions totally full of garbage objects,
1689 // and sort the regions.
1690 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1691 g1_par_note_end_task.freed_bytes(),
1692 g1_par_note_end_task.max_live_bytes());
1693
1694 // Statistics.
1695 double end = os::elapsedTime();
1696 _cleanup_times.add((end - start) * 1000.0);
1697 GCOverheadReporter::recordSTWEnd(end);
1698
1699 // G1CollectedHeap::heap()->print();
1700 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1701 // G1CollectedHeap::heap()->get_gc_time_stamp());
1702
1703 if (PrintGC || PrintGCDetails) {
1704 g1h->print_size_transition(gclog_or_tty,
1705 start_used_bytes,
1706 g1h->used(),
1707 g1h->capacity());
1708 }
1709
1710 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1711 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1712
1713 // We need to make this be a "collection" so any collection pause that
1714 // races with it goes around and waits for completeCleanup to finish.
1715 g1h->increment_total_collections();
1716
1717 #ifndef PRODUCT
1718 if (G1VerifyConcMark) {
1719 G1CollectedHeap::heap()->prepare_for_verify();
1720 G1CollectedHeap::heap()->verify(true,false);
1721 }
1722 #endif
1723 }
1724
1725 void ConcurrentMark::completeCleanup() {
1726 // A full collection intervened.
1727 if (has_aborted()) return;
1728
1729 int first = 0;
1730 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1731 for (int t = 0; t < last; t++) {
1732 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1733 assert(list->well_formed(), "Inv");
1734 HeapRegion* hd = list->hd();
1735 while (hd != NULL) {
1736 // Now finish up the other stuff.
1737 hd->rem_set()->clear();
1738 HeapRegion* next_hd = hd->next_from_unclean_list();
1739 (void)list->pop();
1740 guarantee(list->hd() == next_hd, "how not?");
1741 _g1h->put_region_on_unclean_list(hd);
1742 if (!hd->isHumongous()) {
1743 // Add this to the _free_regions count by 1.
1744 _g1h->finish_free_region_work(0, 0, 1, NULL);
1745 }
1746 hd = list->hd();
1747 guarantee(hd == next_hd, "how not?");
1748 }
1749 }
1750 }
1751
1752
1753 class G1CMIsAliveClosure: public BoolObjectClosure {
1754 G1CollectedHeap* _g1;
1755 public:
1756 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1757 _g1(g1)
1758 {}
1759
1760 void do_object(oop obj) {
1761 assert(false, "not to be invoked");
1762 }
1763 bool do_object_b(oop obj) {
1764 HeapWord* addr = (HeapWord*)obj;
1765 return addr != NULL &&
1766 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1767 }
1768 };
1769
1770 class G1CMKeepAliveClosure: public OopClosure {
1771 G1CollectedHeap* _g1;
1772 ConcurrentMark* _cm;
1773 CMBitMap* _bitMap;
1774 public:
1775 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1776 CMBitMap* bitMap) :
1777 _g1(g1), _cm(cm),
1778 _bitMap(bitMap) {}
1779
1780 void do_oop(narrowOop* p) {
1781 guarantee(false, "NYI");
1782 }
1783
1784 void do_oop(oop* p) {
1785 oop thisOop = *p;
1786 HeapWord* addr = (HeapWord*)thisOop;
1787 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1788 _bitMap->mark(addr);
1789 _cm->mark_stack_push(thisOop);
1790 }
1791 }
1792 };
1793
1794 class G1CMDrainMarkingStackClosure: public VoidClosure {
1795 CMMarkStack* _markStack;
1796 CMBitMap* _bitMap;
1797 G1CMKeepAliveClosure* _oopClosure;
1798 public:
1799 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1800 G1CMKeepAliveClosure* oopClosure) :
1801 _bitMap(bitMap),
1802 _markStack(markStack),
1803 _oopClosure(oopClosure)
1804 {}
1805
1806 void do_void() {
1807 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1808 }
1809 };
1810
1811 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1812 ResourceMark rm;
1813 HandleMark hm;
1814 ReferencePolicy* soft_ref_policy;
1815
1816 // Process weak references.
1817 if (clear_all_soft_refs) {
1818 soft_ref_policy = new AlwaysClearPolicy();
1819 } else {
1820 #ifdef COMPILER2
1821 soft_ref_policy = new LRUMaxHeapPolicy();
1822 #else
1823 soft_ref_policy = new LRUCurrentHeapPolicy();
1824 #endif
1825 }
1826 assert(_markStack.isEmpty(), "mark stack should be empty");
1827
1828 G1CollectedHeap* g1 = G1CollectedHeap::heap();
1829 G1CMIsAliveClosure g1IsAliveClosure(g1);
1830
1831 G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
1832 G1CMDrainMarkingStackClosure
1833 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1834 &g1KeepAliveClosure);
1835
1836 // XXXYYY Also: copy the parallel ref processing code from CMS.
1837 ReferenceProcessor* rp = g1->ref_processor();
1838 rp->process_discovered_references(soft_ref_policy,
1839 &g1IsAliveClosure,
1840 &g1KeepAliveClosure,
1841 &g1DrainMarkingStackClosure,
1842 NULL);
1843 assert(_markStack.overflow() || _markStack.isEmpty(),
1844 "mark stack should be empty (unless it overflowed)");
1845 if (_markStack.overflow()) {
1846 set_has_overflown();
1847 }
1848
1849 rp->enqueue_discovered_references();
1850 rp->verify_no_references_recorded();
1851 assert(!rp->discovery_enabled(), "should have been disabled");
1852
1853 // Now clean up stale oops in SymbolTable and StringTable
1854 SymbolTable::unlink(&g1IsAliveClosure);
1855 StringTable::unlink(&g1IsAliveClosure);
1856 }
1857
1858 void ConcurrentMark::swapMarkBitMaps() {
1859 CMBitMapRO* temp = _prevMarkBitMap;
1860 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1861 _nextMarkBitMap = (CMBitMap*) temp;
1862 }
1863
1864 class CMRemarkTask: public AbstractGangTask {
1865 private:
1866 ConcurrentMark *_cm;
1867
1868 public:
1869 void work(int worker_i) {
1870 // Since all available tasks are actually started, we should
1871 // only proceed if we're supposed to be actived.
1872 if ((size_t)worker_i < _cm->active_tasks()) {
1873 CMTask* task = _cm->task(worker_i);
1874 task->record_start_time();
1875 do {
1876 task->do_marking_step(1000000000.0 /* something very large */);
1877 } while (task->has_aborted() && !_cm->has_overflown());
1878 // If we overflow, then we do not want to restart. We instead
1879 // want to abort remark and do concurrent marking again.
1880 task->record_end_time();
1881 }
1882 }
1883
1884 CMRemarkTask(ConcurrentMark* cm) :
1885 AbstractGangTask("Par Remark"), _cm(cm) { }
1886 };
1887
1888 void ConcurrentMark::checkpointRootsFinalWork() {
1889 ResourceMark rm;
1890 HandleMark hm;
1891 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1892
1893 g1h->ensure_parsability(false);
1894
1895 if (ParallelGCThreads > 0) {
1896 g1h->change_strong_roots_parity();
1897 // this is remark, so we'll use up all available threads
1898 int active_workers = ParallelGCThreads;
1899 set_phase(active_workers, false);
1900
1901 CMRemarkTask remarkTask(this);
1902 // We will start all available threads, even if we decide that the
1903 // active_workers will be fewer. The extra ones will just bail out
1904 // immediately.
1905 int n_workers = g1h->workers()->total_workers();
1906 g1h->set_par_threads(n_workers);
1907 g1h->workers()->run_task(&remarkTask);
1908 g1h->set_par_threads(0);
1909
1910 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1911 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1912 } else {
1913 g1h->change_strong_roots_parity();
1914 // this is remark, so we'll use up all available threads
1915 int active_workers = 1;
1916 set_phase(active_workers, false);
1917
1918 CMRemarkTask remarkTask(this);
1919 // We will start all available threads, even if we decide that the
1920 // active_workers will be fewer. The extra ones will just bail out
1921 // immediately.
1922 remarkTask.work(0);
1923
1924 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1925 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1926 }
1927
1928 print_stats();
1929
1930 if (!restart_for_overflow())
1931 set_non_marking_state();
1932
1933 #if VERIFY_OBJS_PROCESSED
1934 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1935 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1936 _scan_obj_cl.objs_processed,
1937 ThreadLocalObjQueue::objs_enqueued);
1938 guarantee(_scan_obj_cl.objs_processed ==
1939 ThreadLocalObjQueue::objs_enqueued,
1940 "Different number of objs processed and enqueued.");
1941 }
1942 #endif
1943 }
1944
1945 class ReachablePrinterOopClosure: public OopClosure {
1946 private:
1947 G1CollectedHeap* _g1h;
1948 CMBitMapRO* _bitmap;
1949 outputStream* _out;
1950
1951 public:
1952 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1953 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1954
1955 void do_oop(narrowOop* p) {
1956 guarantee(false, "NYI");
1957 }
1958
1959 void do_oop(oop* p) {
1960 oop obj = *p;
1961 const char* str = NULL;
1962 const char* str2 = "";
1963
1964 if (!_g1h->is_in_g1_reserved(obj))
1965 str = "outside G1 reserved";
1966 else {
1967 HeapRegion* hr = _g1h->heap_region_containing(obj);
1968 guarantee( hr != NULL, "invariant" );
1969 if (hr->obj_allocated_since_prev_marking(obj)) {
1970 str = "over TAMS";
1971 if (_bitmap->isMarked((HeapWord*) obj))
1972 str2 = " AND MARKED";
1973 } else if (_bitmap->isMarked((HeapWord*) obj))
1974 str = "marked";
1975 else
1976 str = "#### NOT MARKED ####";
1977 }
1978
1979 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1980 p, (void*) obj, str, str2);
1981 }
1982 };
1983
1984 class ReachablePrinterClosure: public BitMapClosure {
1985 private:
1986 CMBitMapRO* _bitmap;
1987 outputStream* _out;
1988
1989 public:
1990 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
1991 _bitmap(bitmap), _out(out) { }
1992
1993 bool do_bit(size_t offset) {
1994 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
1995 ReachablePrinterOopClosure oopCl(_bitmap, _out);
1996
1997 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
1998 oop(addr)->oop_iterate(&oopCl);
1999 _out->print_cr("");
2000
2001 return true;
2002 }
2003 };
2004
2005 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2006 private:
2007 CMBitMapRO* _bitmap;
2008 outputStream* _out;
2009
2010 public:
2011 void do_object(oop o) {
2012 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2013
2014 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2015 o->oop_iterate(&oopCl);
2016 _out->print_cr("");
2017 }
2018
2019 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2020 _bitmap(bitmap), _out(out) { }
2021 };
2022
2023 class RegionReachablePrinterClosure : public HeapRegionClosure {
2024 private:
2025 CMBitMapRO* _bitmap;
2026 outputStream* _out;
2027
2028 public:
2029 bool doHeapRegion(HeapRegion* hr) {
2030 HeapWord* b = hr->bottom();
2031 HeapWord* e = hr->end();
2032 HeapWord* t = hr->top();
2033 HeapWord* p = hr->prev_top_at_mark_start();
2034 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2035 "PTAMS: "PTR_FORMAT, b, e, t, p);
2036 _out->print_cr("");
2037
2038 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2039 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2040
2041 return false;
2042 }
2043
2044 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2045 outputStream* out) :
2046 _bitmap(bitmap), _out(out) { }
2047 };
2048
2049 void ConcurrentMark::print_prev_bitmap_reachable() {
2050 outputStream* out = gclog_or_tty;
2051
2052 #if SEND_HEAP_DUMP_TO_FILE
2053 guarantee(heap_dump_file == NULL, "Protocol");
2054 char fn_buf[100];
2055 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2056 heap_dump_file = fopen(fn_buf, "w");
2057 fileStream fstream(heap_dump_file);
2058 out = &fstream;
2059 #endif // SEND_HEAP_DUMP_TO_FILE
2060
2061 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2062 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2063 _g1h->heap_region_iterate(&rcl);
2064 out->print_cr("");
2065
2066 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2067 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2068 _prevMarkBitMap->iterate(&cl);
2069 out->print_cr("");
2070
2071 #if SEND_HEAP_DUMP_TO_FILE
2072 fclose(heap_dump_file);
2073 heap_dump_file = NULL;
2074 #endif // SEND_HEAP_DUMP_TO_FILE
2075 }
2076
2077 // This note is for drainAllSATBBuffers and the code in between.
2078 // In the future we could reuse a task to do this work during an
2079 // evacuation pause (since now tasks are not active and can be claimed
2080 // during an evacuation pause). This was a late change to the code and
2081 // is currently not being taken advantage of.
2082
2083 class CMGlobalObjectClosure : public ObjectClosure {
2084 private:
2085 ConcurrentMark* _cm;
2086
2087 public:
2088 void do_object(oop obj) {
2089 _cm->deal_with_reference(obj);
2090 }
2091
2092 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2093 };
2094
2095 void ConcurrentMark::deal_with_reference(oop obj) {
2096 if (verbose_high())
2097 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2098 (void*) obj);
2099
2100
2101 HeapWord* objAddr = (HeapWord*) obj;
2102 if (_g1h->is_in_g1_reserved(objAddr)) {
2103 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2104 HeapRegion* hr = _g1h->heap_region_containing(obj);
2105 if (_g1h->is_obj_ill(obj, hr)) {
2106 if (verbose_high())
2107 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2108 "marked", (void*) obj);
2109
2110 // we need to mark it first
2111 if (_nextMarkBitMap->parMark(objAddr)) {
2112 // No OrderAccess:store_load() is needed. It is implicit in the
2113 // CAS done in parMark(objAddr) above
2114 HeapWord* finger = _finger;
2115 if (objAddr < finger) {
2116 if (verbose_high())
2117 gclog_or_tty->print_cr("[global] below the global finger "
2118 "("PTR_FORMAT"), pushing it", finger);
2119 if (!mark_stack_push(obj)) {
2120 if (verbose_low())
2121 gclog_or_tty->print_cr("[global] global stack overflow during "
2122 "deal_with_reference");
2123 }
2124 }
2125 }
2126 }
2127 }
2128 }
2129
2130 void ConcurrentMark::drainAllSATBBuffers() {
2131 CMGlobalObjectClosure oc(this);
2132 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2133 satb_mq_set.set_closure(&oc);
2134
2135 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2136 if (verbose_medium())
2137 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2138 }
2139
2140 // no need to check whether we should do this, as this is only
2141 // called during an evacuation pause
2142 satb_mq_set.iterate_closure_all_threads();
2143
2144 satb_mq_set.set_closure(NULL);
2145 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2146 }
2147
2148 void ConcurrentMark::markPrev(oop p) {
2149 // Note we are overriding the read-only view of the prev map here, via
2150 // the cast.
2151 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2152 }
2153
2154 void ConcurrentMark::clear(oop p) {
2155 assert(p != NULL && p->is_oop(), "expected an oop");
2156 HeapWord* addr = (HeapWord*)p;
2157 assert(addr >= _nextMarkBitMap->startWord() ||
2158 addr < _nextMarkBitMap->endWord(), "in a region");
2159
2160 _nextMarkBitMap->clear(addr);
2161 }
2162
2163 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2164 // Note we are overriding the read-only view of the prev map here, via
2165 // the cast.
2166 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2167 _nextMarkBitMap->clearRange(mr);
2168 }
2169
2170 HeapRegion*
2171 ConcurrentMark::claim_region(int task_num) {
2172 // "checkpoint" the finger
2173 HeapWord* finger = _finger;
2174
2175 // _heap_end will not change underneath our feet; it only changes at
2176 // yield points.
2177 while (finger < _heap_end) {
2178 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2179
2180 // is the gap between reading the finger and doing the CAS too long?
2181
2182 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2183 HeapWord* bottom = curr_region->bottom();
2184 HeapWord* end = curr_region->end();
2185 HeapWord* limit = curr_region->next_top_at_mark_start();
2186
2187 if (verbose_low())
2188 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2189 "["PTR_FORMAT", "PTR_FORMAT"), "
2190 "limit = "PTR_FORMAT,
2191 task_num, curr_region, bottom, end, limit);
2192
2193 HeapWord* res =
2194 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2195 if (res == finger) {
2196 // we succeeded
2197
2198 // notice that _finger == end cannot be guaranteed here since,
2199 // someone else might have moved the finger even further
2200 guarantee( _finger >= end, "the finger should have moved forward" );
2201
2202 if (verbose_low())
2203 gclog_or_tty->print_cr("[%d] we were successful with region = "
2204 PTR_FORMAT, task_num, curr_region);
2205
2206 if (limit > bottom) {
2207 if (verbose_low())
2208 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2209 "returning it ", task_num, curr_region);
2210 return curr_region;
2211 } else {
2212 tmp_guarantee_CM( limit == bottom,
2213 "the region limit should be at bottom" );
2214 if (verbose_low())
2215 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2216 "returning NULL", task_num, curr_region);
2217 // we return NULL and the caller should try calling
2218 // claim_region() again.
2219 return NULL;
2220 }
2221 } else {
2222 guarantee( _finger > finger, "the finger should have moved forward" );
2223 if (verbose_low())
2224 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2225 "global finger = "PTR_FORMAT", "
2226 "our finger = "PTR_FORMAT,
2227 task_num, _finger, finger);
2228
2229 // read it again
2230 finger = _finger;
2231 }
2232 }
2233
2234 return NULL;
2235 }
2236
2237 void ConcurrentMark::oops_do(OopClosure* cl) {
2238 if (_markStack.size() > 0 && verbose_low())
2239 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2240 "size = %d", _markStack.size());
2241 // we first iterate over the contents of the mark stack...
2242 _markStack.oops_do(cl);
2243
2244 for (int i = 0; i < (int)_max_task_num; ++i) {
2245 OopTaskQueue* queue = _task_queues->queue((int)i);
2246
2247 if (queue->size() > 0 && verbose_low())
2248 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2249 "size = %d", i, queue->size());
2250
2251 // ...then over the contents of the all the task queues.
2252 queue->oops_do(cl);
2253 }
2254
2255 // finally, invalidate any entries that in the region stack that
2256 // point into the collection set
2257 if (_regionStack.invalidate_entries_into_cset()) {
2258 // otherwise, any gray objects copied during the evacuation pause
2259 // might not be visited.
2260 guarantee( _should_gray_objects, "invariant" );
2261 }
2262 }
2263
2264 void ConcurrentMark::clear_marking_state() {
2265 _markStack.setEmpty();
2266 _markStack.clear_overflow();
2267 _regionStack.setEmpty();
2268 _regionStack.clear_overflow();
2269 clear_has_overflown();
2270 _finger = _heap_start;
2271
2272 for (int i = 0; i < (int)_max_task_num; ++i) {
2273 OopTaskQueue* queue = _task_queues->queue(i);
2274 queue->set_empty();
2275 }
2276 }
2277
2278 void ConcurrentMark::print_stats() {
2279 if (verbose_stats()) {
2280 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2281 for (size_t i = 0; i < _active_tasks; ++i) {
2282 _tasks[i]->print_stats();
2283 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2284 }
2285 }
2286 }
2287
2288 class CSMarkOopClosure: public OopClosure {
2289 friend class CSMarkBitMapClosure;
2290
2291 G1CollectedHeap* _g1h;
2292 CMBitMap* _bm;
2293 ConcurrentMark* _cm;
2294 oop* _ms;
2295 jint* _array_ind_stack;
2296 int _ms_size;
2297 int _ms_ind;
2298 int _array_increment;
2299
2300 bool push(oop obj, int arr_ind = 0) {
2301 if (_ms_ind == _ms_size) {
2302 gclog_or_tty->print_cr("Mark stack is full.");
2303 return false;
2304 }
2305 _ms[_ms_ind] = obj;
2306 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2307 _ms_ind++;
2308 return true;
2309 }
2310
2311 oop pop() {
2312 if (_ms_ind == 0) return NULL;
2313 else {
2314 _ms_ind--;
2315 return _ms[_ms_ind];
2316 }
2317 }
2318
2319 bool drain() {
2320 while (_ms_ind > 0) {
2321 oop obj = pop();
2322 assert(obj != NULL, "Since index was non-zero.");
2323 if (obj->is_objArray()) {
2324 jint arr_ind = _array_ind_stack[_ms_ind];
2325 objArrayOop aobj = objArrayOop(obj);
2326 jint len = aobj->length();
2327 jint next_arr_ind = arr_ind + _array_increment;
2328 if (next_arr_ind < len) {
2329 push(obj, next_arr_ind);
2330 }
2331 // Now process this portion of this one.
2332 int lim = MIN2(next_arr_ind, len);
2333 assert(!UseCompressedOops, "This needs to be fixed");
2334 for (int j = arr_ind; j < lim; j++) {
2335 do_oop(aobj->obj_at_addr<oop>(j));
2336 }
2337
2338 } else {
2339 obj->oop_iterate(this);
2340 }
2341 if (abort()) return false;
2342 }
2343 return true;
2344 }
2345
2346 public:
2347 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2348 _g1h(G1CollectedHeap::heap()),
2349 _cm(cm),
2350 _bm(cm->nextMarkBitMap()),
2351 _ms_size(ms_size), _ms_ind(0),
2352 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2353 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2354 _array_increment(MAX2(ms_size/8, 16))
2355 {}
2356
2357 ~CSMarkOopClosure() {
2358 FREE_C_HEAP_ARRAY(oop, _ms);
2359 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2360 }
2361
2362 void do_oop(narrowOop* p) {
2363 guarantee(false, "NYI");
2364 }
2365
2366 void do_oop(oop* p) {
2367 oop obj = *p;
2368 if (obj == NULL) return;
2369 if (obj->is_forwarded()) {
2370 // If the object has already been forwarded, we have to make sure
2371 // that it's marked. So follow the forwarding pointer. Note that
2372 // this does the right thing for self-forwarding pointers in the
2373 // evacuation failure case.
2374 obj = obj->forwardee();
2375 }
2376 HeapRegion* hr = _g1h->heap_region_containing(obj);
2377 if (hr != NULL) {
2378 if (hr->in_collection_set()) {
2379 if (_g1h->is_obj_ill(obj)) {
2380 _bm->mark((HeapWord*)obj);
2381 if (!push(obj)) {
2382 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2383 set_abort();
2384 }
2385 }
2386 } else {
2387 // Outside the collection set; we need to gray it
2388 _cm->deal_with_reference(obj);
2389 }
2390 }
2391 }
2392 };
2393
2394 class CSMarkBitMapClosure: public BitMapClosure {
2395 G1CollectedHeap* _g1h;
2396 CMBitMap* _bitMap;
2397 ConcurrentMark* _cm;
2398 CSMarkOopClosure _oop_cl;
2399 public:
2400 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2401 _g1h(G1CollectedHeap::heap()),
2402 _bitMap(cm->nextMarkBitMap()),
2403 _oop_cl(cm, ms_size)
2404 {}
2405
2406 ~CSMarkBitMapClosure() {}
2407
2408 bool do_bit(size_t offset) {
2409 // convert offset into a HeapWord*
2410 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2411 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2412 "address out of range");
2413 assert(_bitMap->isMarked(addr), "tautology");
2414 oop obj = oop(addr);
2415 if (!obj->is_forwarded()) {
2416 if (!_oop_cl.push(obj)) return false;
2417 if (!_oop_cl.drain()) return false;
2418 }
2419 // Otherwise...
2420 return true;
2421 }
2422 };
2423
2424
2425 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2426 CMBitMap* _bm;
2427 CSMarkBitMapClosure _bit_cl;
2428 enum SomePrivateConstants {
2429 MSSize = 1000
2430 };
2431 bool _completed;
2432 public:
2433 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2434 _bm(cm->nextMarkBitMap()),
2435 _bit_cl(cm, MSSize),
2436 _completed(true)
2437 {}
2438
2439 ~CompleteMarkingInCSHRClosure() {}
2440
2441 bool doHeapRegion(HeapRegion* r) {
2442 if (!r->evacuation_failed()) {
2443 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2444 if (!mr.is_empty()) {
2445 if (!_bm->iterate(&_bit_cl, mr)) {
2446 _completed = false;
2447 return true;
2448 }
2449 }
2450 }
2451 return false;
2452 }
2453
2454 bool completed() { return _completed; }
2455 };
2456
2457 class ClearMarksInHRClosure: public HeapRegionClosure {
2458 CMBitMap* _bm;
2459 public:
2460 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2461
2462 bool doHeapRegion(HeapRegion* r) {
2463 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2464 MemRegion usedMR = r->used_region();
2465 _bm->clearRange(r->used_region());
2466 }
2467 return false;
2468 }
2469 };
2470
2471 void ConcurrentMark::complete_marking_in_collection_set() {
2472 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2473
2474 if (!g1h->mark_in_progress()) {
2475 g1h->g1_policy()->record_mark_closure_time(0.0);
2476 return;
2477 }
2478
2479 int i = 1;
2480 double start = os::elapsedTime();
2481 while (true) {
2482 i++;
2483 CompleteMarkingInCSHRClosure cmplt(this);
2484 g1h->collection_set_iterate(&cmplt);
2485 if (cmplt.completed()) break;
2486 }
2487 double end_time = os::elapsedTime();
2488 double elapsed_time_ms = (end_time - start) * 1000.0;
2489 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2490 if (PrintGCDetails) {
2491 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2492 }
2493
2494 ClearMarksInHRClosure clr(nextMarkBitMap());
2495 g1h->collection_set_iterate(&clr);
2496 }
2497
2498 // The next two methods deal with the following optimisation. Some
2499 // objects are gray by being marked and located above the finger. If
2500 // they are copied, during an evacuation pause, below the finger then
2501 // the need to be pushed on the stack. The observation is that, if
2502 // there are no regions in the collection set located above the
2503 // finger, then the above cannot happen, hence we do not need to
2504 // explicitly gray any objects when copying them to below the
2505 // finger. The global stack will be scanned to ensure that, if it
2506 // points to objects being copied, it will update their
2507 // location. There is a tricky situation with the gray objects in
2508 // region stack that are being coped, however. See the comment in
2509 // newCSet().
2510
2511 void ConcurrentMark::newCSet() {
2512 if (!concurrent_marking_in_progress())
2513 // nothing to do if marking is not in progress
2514 return;
2515
2516 // find what the lowest finger is among the global and local fingers
2517 _min_finger = _finger;
2518 for (int i = 0; i < (int)_max_task_num; ++i) {
2519 CMTask* task = _tasks[i];
2520 HeapWord* task_finger = task->finger();
2521 if (task_finger != NULL && task_finger < _min_finger)
2522 _min_finger = task_finger;
2523 }
2524
2525 _should_gray_objects = false;
2526
2527 // This fixes a very subtle and fustrating bug. It might be the case
2528 // that, during en evacuation pause, heap regions that contain
2529 // objects that are gray (by being in regions contained in the
2530 // region stack) are included in the collection set. Since such gray
2531 // objects will be moved, and because it's not easy to redirect
2532 // region stack entries to point to a new location (because objects
2533 // in one region might be scattered to multiple regions after they
2534 // are copied), one option is to ensure that all marked objects
2535 // copied during a pause are pushed on the stack. Notice, however,
2536 // that this problem can only happen when the region stack is not
2537 // empty during an evacuation pause. So, we make the fix a bit less
2538 // conservative and ensure that regions are pushed on the stack,
2539 // irrespective whether all collection set regions are below the
2540 // finger, if the region stack is not empty. This is expected to be
2541 // a rare case, so I don't think it's necessary to be smarted about it.
2542 if (!region_stack_empty())
2543 _should_gray_objects = true;
2544 }
2545
2546 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2547 if (!concurrent_marking_in_progress())
2548 return;
2549
2550 HeapWord* region_end = hr->end();
2551 if (region_end > _min_finger)
2552 _should_gray_objects = true;
2553 }
2554
2555 void ConcurrentMark::disable_co_trackers() {
2556 if (has_aborted()) {
2557 if (_cleanup_co_tracker.enabled())
2558 _cleanup_co_tracker.disable();
2559 for (int i = 0; i < (int)_max_task_num; ++i) {
2560 CMTask* task = _tasks[i];
2561 if (task->co_tracker_enabled())
2562 task->disable_co_tracker();
2563 }
2564 } else {
2565 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2566 for (int i = 0; i < (int)_max_task_num; ++i) {
2567 CMTask* task = _tasks[i];
2568 guarantee( !task->co_tracker_enabled(), "invariant" );
2569 }
2570 }
2571 }
2572
2573 // abandon current marking iteration due to a Full GC
2574 void ConcurrentMark::abort() {
2575 // If we're not marking, nothing to do.
2576 if (!G1ConcMark) return;
2577
2578 // Clear all marks to force marking thread to do nothing
2579 _nextMarkBitMap->clearAll();
2580 // Empty mark stack
2581 clear_marking_state();
2582 for (int i = 0; i < (int)_max_task_num; ++i)
2583 _tasks[i]->clear_region_fields();
2584 _has_aborted = true;
2585
2586 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2587 satb_mq_set.abandon_partial_marking();
2588 satb_mq_set.set_active_all_threads(false);
2589 }
2590
2591 static void print_ms_time_info(const char* prefix, const char* name,
2592 NumberSeq& ns) {
2593 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2594 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2595 if (ns.num() > 0) {
2596 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2597 prefix, ns.sd(), ns.maximum());
2598 }
2599 }
2600
2601 void ConcurrentMark::print_summary_info() {
2602 gclog_or_tty->print_cr(" Concurrent marking:");
2603 print_ms_time_info(" ", "init marks", _init_times);
2604 print_ms_time_info(" ", "remarks", _remark_times);
2605 {
2606 print_ms_time_info(" ", "final marks", _remark_mark_times);
2607 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2608
2609 }
2610 print_ms_time_info(" ", "cleanups", _cleanup_times);
2611 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2612 _total_counting_time,
2613 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2614 (double)_cleanup_times.num()
2615 : 0.0));
2616 if (G1ScrubRemSets) {
2617 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2618 _total_rs_scrub_time,
2619 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2620 (double)_cleanup_times.num()
2621 : 0.0));
2622 }
2623 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2624 (_init_times.sum() + _remark_times.sum() +
2625 _cleanup_times.sum())/1000.0);
2626 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2627 "(%8.2f s marking, %8.2f s counting).",
2628 cmThread()->vtime_accum(),
2629 cmThread()->vtime_mark_accum(),
2630 cmThread()->vtime_count_accum());
2631 }
2632
2633 // Closures
2634 // XXX: there seems to be a lot of code duplication here;
2635 // should refactor and consolidate the shared code.
2636
2637 // This closure is used to mark refs into the CMS generation in
2638 // the CMS bit map. Called at the first checkpoint.
2639
2640 // We take a break if someone is trying to stop the world.
2641 bool ConcurrentMark::do_yield_check(int worker_i) {
2642 if (should_yield()) {
2643 if (worker_i == 0)
2644 _g1h->g1_policy()->record_concurrent_pause();
2645 cmThread()->yield();
2646 if (worker_i == 0)
2647 _g1h->g1_policy()->record_concurrent_pause_end();
2648 return true;
2649 } else {
2650 return false;
2651 }
2652 }
2653
2654 bool ConcurrentMark::should_yield() {
2655 return cmThread()->should_yield();
2656 }
2657
2658 bool ConcurrentMark::containing_card_is_marked(void* p) {
2659 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2660 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2661 }
2662
2663 bool ConcurrentMark::containing_cards_are_marked(void* start,
2664 void* last) {
2665 return
2666 containing_card_is_marked(start) &&
2667 containing_card_is_marked(last);
2668 }
2669
2670 #ifndef PRODUCT
2671 // for debugging purposes
2672 void ConcurrentMark::print_finger() {
2673 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2674 _heap_start, _heap_end, _finger);
2675 for (int i = 0; i < (int) _max_task_num; ++i) {
2676 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2677 }
2678 gclog_or_tty->print_cr("");
2679 }
2680 #endif
2681
2682 // Closure for iteration over bitmaps
2683 class CMBitMapClosure : public BitMapClosure {
2684 private:
2685 // the bitmap that is being iterated over
2686 CMBitMap* _nextMarkBitMap;
2687 ConcurrentMark* _cm;
2688 CMTask* _task;
2689 // true if we're scanning a heap region claimed by the task (so that
2690 // we move the finger along), false if we're not, i.e. currently when
2691 // scanning a heap region popped from the region stack (so that we
2692 // do not move the task finger along; it'd be a mistake if we did so).
2693 bool _scanning_heap_region;
2694
2695 public:
2696 CMBitMapClosure(CMTask *task,
2697 ConcurrentMark* cm,
2698 CMBitMap* nextMarkBitMap)
2699 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2700
2701 void set_scanning_heap_region(bool scanning_heap_region) {
2702 _scanning_heap_region = scanning_heap_region;
2703 }
2704
2705 bool do_bit(size_t offset) {
2706 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2707 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2708 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2709
2710 if (_scanning_heap_region) {
2711 statsOnly( _task->increase_objs_found_on_bitmap() );
2712 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2713 // We move that task's local finger along.
2714 _task->move_finger_to(addr);
2715 } else {
2716 // We move the task's region finger along.
2717 _task->move_region_finger_to(addr);
2718 }
2719
2720 _task->scan_object(oop(addr));
2721 // we only partially drain the local queue and global stack
2722 _task->drain_local_queue(true);
2723 _task->drain_global_stack(true);
2724
2725 // if the has_aborted flag has been raised, we need to bail out of
2726 // the iteration
2727 return !_task->has_aborted();
2728 }
2729 };
2730
2731 // Closure for iterating over objects, currently only used for
2732 // processing SATB buffers.
2733 class CMObjectClosure : public ObjectClosure {
2734 private:
2735 CMTask* _task;
2736
2737 public:
2738 void do_object(oop obj) {
2739 _task->deal_with_reference(obj);
2740 }
2741
2742 CMObjectClosure(CMTask* task) : _task(task) { }
2743 };
2744
2745 // Closure for iterating over object fields
2746 class CMOopClosure : public OopClosure {
2747 private:
2748 G1CollectedHeap* _g1h;
2749 ConcurrentMark* _cm;
2750 CMTask* _task;
2751
2752 public:
2753 void do_oop(narrowOop* p) {
2754 guarantee(false, "NYI");
2755 }
2756
2757 void do_oop(oop* p) {
2758 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2759
2760 oop obj = *p;
2761 if (_cm->verbose_high())
2762 gclog_or_tty->print_cr("[%d] we're looking at location "
2763 "*"PTR_FORMAT" = "PTR_FORMAT,
2764 _task->task_id(), p, (void*) obj);
2765 _task->deal_with_reference(obj);
2766 }
2767
2768 CMOopClosure(G1CollectedHeap* g1h,
2769 ConcurrentMark* cm,
2770 CMTask* task)
2771 : _g1h(g1h), _cm(cm), _task(task) { }
2772 };
2773
2774 void CMTask::setup_for_region(HeapRegion* hr) {
2775 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2776 "claim_region() should have filtered out continues humongous regions" );
2777
2778 if (_cm->verbose_low())
2779 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2780 _task_id, hr);
2781
2782 _curr_region = hr;
2783 _finger = hr->bottom();
2784 update_region_limit();
2785 }
2786
2787 void CMTask::update_region_limit() {
2788 HeapRegion* hr = _curr_region;
2789 HeapWord* bottom = hr->bottom();
2790 HeapWord* limit = hr->next_top_at_mark_start();
2791
2792 if (limit == bottom) {
2793 if (_cm->verbose_low())
2794 gclog_or_tty->print_cr("[%d] found an empty region "
2795 "["PTR_FORMAT", "PTR_FORMAT")",
2796 _task_id, bottom, limit);
2797 // The region was collected underneath our feet.
2798 // We set the finger to bottom to ensure that the bitmap
2799 // iteration that will follow this will not do anything.
2800 // (this is not a condition that holds when we set the region up,
2801 // as the region is not supposed to be empty in the first place)
2802 _finger = bottom;
2803 } else if (limit >= _region_limit) {
2804 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2805 } else {
2806 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2807 // This can happen under some pretty unusual circumstances. An
2808 // evacuation pause empties the region underneath our feet (NTAMS
2809 // at bottom). We then do some allocation in the region (NTAMS
2810 // stays at bottom), followed by the region being used as a GC
2811 // alloc region (NTAMS will move to top() and the objects
2812 // originally below it will be grayed). All objects now marked in
2813 // the region are explicitly grayed, if below the global finger,
2814 // and we do not need in fact to scan anything else. So, we simply
2815 // set _finger to be limit to ensure that the bitmap iteration
2816 // doesn't do anything.
2817 _finger = limit;
2818 }
2819
2820 _region_limit = limit;
2821 }
2822
2823 void CMTask::giveup_current_region() {
2824 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2825 if (_cm->verbose_low())
2826 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2827 _task_id, _curr_region);
2828 clear_region_fields();
2829 }
2830
2831 void CMTask::clear_region_fields() {
2832 // Values for these three fields that indicate that we're not
2833 // holding on to a region.
2834 _curr_region = NULL;
2835 _finger = NULL;
2836 _region_limit = NULL;
2837
2838 _region_finger = NULL;
2839 }
2840
2841 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2842 guarantee( nextMarkBitMap != NULL, "invariant" );
2843
2844 if (_cm->verbose_low())
2845 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2846
2847 _nextMarkBitMap = nextMarkBitMap;
2848 clear_region_fields();
2849
2850 _calls = 0;
2851 _elapsed_time_ms = 0.0;
2852 _termination_time_ms = 0.0;
2853 _termination_start_time_ms = 0.0;
2854
2855 #if _MARKING_STATS_
2856 _local_pushes = 0;
2857 _local_pops = 0;
2858 _local_max_size = 0;
2859 _objs_scanned = 0;
2860 _global_pushes = 0;
2861 _global_pops = 0;
2862 _global_max_size = 0;
2863 _global_transfers_to = 0;
2864 _global_transfers_from = 0;
2865 _region_stack_pops = 0;
2866 _regions_claimed = 0;
2867 _objs_found_on_bitmap = 0;
2868 _satb_buffers_processed = 0;
2869 _steal_attempts = 0;
2870 _steals = 0;
2871 _aborted = 0;
2872 _aborted_overflow = 0;
2873 _aborted_cm_aborted = 0;
2874 _aborted_yield = 0;
2875 _aborted_timed_out = 0;
2876 _aborted_satb = 0;
2877 _aborted_termination = 0;
2878 #endif // _MARKING_STATS_
2879 }
2880
2881 bool CMTask::should_exit_termination() {
2882 regular_clock_call();
2883 // This is called when we are in the termination protocol. We should
2884 // quit if, for some reason, this task wants to abort or the global
2885 // stack is not empty (this means that we can get work from it).
2886 return !_cm->mark_stack_empty() || has_aborted();
2887 }
2888
2889 // This determines whether the method below will check both the local
2890 // and global fingers when determining whether to push on the stack a
2891 // gray object (value 1) or whether it will only check the global one
2892 // (value 0). The tradeoffs are that the former will be a bit more
2893 // accurate and possibly push less on the stack, but it might also be
2894 // a little bit slower.
2895
2896 #define _CHECK_BOTH_FINGERS_ 1
2897
2898 void CMTask::deal_with_reference(oop obj) {
2899 if (_cm->verbose_high())
2900 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2901 _task_id, (void*) obj);
2902
2903 ++_refs_reached;
2904
2905 HeapWord* objAddr = (HeapWord*) obj;
2906 if (_g1h->is_in_g1_reserved(objAddr)) {
2907 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2908 HeapRegion* hr = _g1h->heap_region_containing(obj);
2909 if (_g1h->is_obj_ill(obj, hr)) {
2910 if (_cm->verbose_high())
2911 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2912 _task_id, (void*) obj);
2913
2914 // we need to mark it first
2915 if (_nextMarkBitMap->parMark(objAddr)) {
2916 // No OrderAccess:store_load() is needed. It is implicit in the
2917 // CAS done in parMark(objAddr) above
2918 HeapWord* global_finger = _cm->finger();
2919
2920 #if _CHECK_BOTH_FINGERS_
2921 // we will check both the local and global fingers
2922
2923 if (_finger != NULL && objAddr < _finger) {
2924 if (_cm->verbose_high())
2925 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2926 "pushing it", _task_id, _finger);
2927 push(obj);
2928 } else if (_curr_region != NULL && objAddr < _region_limit) {
2929 // do nothing
2930 } else if (objAddr < global_finger) {
2931 // Notice that the global finger might be moving forward
2932 // concurrently. This is not a problem. In the worst case, we
2933 // mark the object while it is above the global finger and, by
2934 // the time we read the global finger, it has moved forward
2935 // passed this object. In this case, the object will probably
2936 // be visited when a task is scanning the region and will also
2937 // be pushed on the stack. So, some duplicate work, but no
2938 // correctness problems.
2939
2940 if (_cm->verbose_high())
2941 gclog_or_tty->print_cr("[%d] below the global finger "
2942 "("PTR_FORMAT"), pushing it",
2943 _task_id, global_finger);
2944 push(obj);
2945 } else {
2946 // do nothing
2947 }
2948 #else // _CHECK_BOTH_FINGERS_
2949 // we will only check the global finger
2950
2951 if (objAddr < global_finger) {
2952 // see long comment above
2953
2954 if (_cm->verbose_high())
2955 gclog_or_tty->print_cr("[%d] below the global finger "
2956 "("PTR_FORMAT"), pushing it",
2957 _task_id, global_finger);
2958 push(obj);
2959 }
2960 #endif // _CHECK_BOTH_FINGERS_
2961 }
2962 }
2963 }
2964 }
2965
2966 void CMTask::push(oop obj) {
2967 HeapWord* objAddr = (HeapWord*) obj;
2968 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2969 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2970 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2971
2972 if (_cm->verbose_high())
2973 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2974
2975 if (!_task_queue->push(obj)) {
2976 // The local task queue looks full. We need to push some entries
2977 // to the global stack.
2978
2979 if (_cm->verbose_medium())
2980 gclog_or_tty->print_cr("[%d] task queue overflow, "
2981 "moving entries to the global stack",
2982 _task_id);
2983 move_entries_to_global_stack();
2984
2985 // this should succeed since, even if we overflow the global
2986 // stack, we should have definitely removed some entries from the
2987 // local queue. So, there must be space on it.
2988 bool success = _task_queue->push(obj);
2989 tmp_guarantee_CM( success, "invariant" );
2990 }
2991
2992 statsOnly( int tmp_size = _task_queue->size();
2993 if (tmp_size > _local_max_size)
2994 _local_max_size = tmp_size;
2995 ++_local_pushes );
2996 }
2997
2998 void CMTask::reached_limit() {
2999 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3000 _refs_reached >= _refs_reached_limit ,
3001 "shouldn't have been called otherwise" );
3002 regular_clock_call();
3003 }
3004
3005 void CMTask::regular_clock_call() {
3006 if (has_aborted())
3007 return;
3008
3009 // First, we need to recalculate the words scanned and refs reached
3010 // limits for the next clock call.
3011 recalculate_limits();
3012
3013 // During the regular clock call we do the following
3014
3015 // (1) If an overflow has been flagged, then we abort.
3016 if (_cm->has_overflown()) {
3017 set_has_aborted();
3018 return;
3019 }
3020
3021 // If we are not concurrent (i.e. we're doing remark) we don't need
3022 // to check anything else. The other steps are only needed during
3023 // the concurrent marking phase.
3024 if (!concurrent())
3025 return;
3026
3027 // (2) If marking has been aborted for Full GC, then we also abort.
3028 if (_cm->has_aborted()) {
3029 set_has_aborted();
3030 statsOnly( ++_aborted_cm_aborted );
3031 return;
3032 }
3033
3034 double curr_time_ms = os::elapsedVTime() * 1000.0;
3035
3036 // (3) If marking stats are enabled, then we update the step history.
3037 #if _MARKING_STATS_
3038 if (_words_scanned >= _words_scanned_limit)
3039 ++_clock_due_to_scanning;
3040 if (_refs_reached >= _refs_reached_limit)
3041 ++_clock_due_to_marking;
3042
3043 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3044 _interval_start_time_ms = curr_time_ms;
3045 _all_clock_intervals_ms.add(last_interval_ms);
3046
3047 if (_cm->verbose_medium()) {
3048 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3049 "scanned = %d%s, refs reached = %d%s",
3050 _task_id, last_interval_ms,
3051 _words_scanned,
3052 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3053 _refs_reached,
3054 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3055 }
3056 #endif // _MARKING_STATS_
3057
3058 // (4) We check whether we should yield. If we have to, then we abort.
3059 if (_cm->should_yield()) {
3060 // We should yield. To do this we abort the task. The caller is
3061 // responsible for yielding.
3062 set_has_aborted();
3063 statsOnly( ++_aborted_yield );
3064 return;
3065 }
3066
3067 // (5) We check whether we've reached our time quota. If we have,
3068 // then we abort.
3069 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3070 if (elapsed_time_ms > _time_target_ms) {
3071 set_has_aborted();
3072 _has_aborted_timed_out = true;
3073 statsOnly( ++_aborted_timed_out );
3074 return;
3075 }
3076
3077 // (6) Finally, we check whether there are enough completed STAB
3078 // buffers available for processing. If there are, we abort.
3079 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3080 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3081 if (_cm->verbose_low())
3082 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3083 _task_id);
3084 // we do need to process SATB buffers, we'll abort and restart
3085 // the marking task to do so
3086 set_has_aborted();
3087 statsOnly( ++_aborted_satb );
3088 return;
3089 }
3090 }
3091
3092 void CMTask::recalculate_limits() {
3093 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3094 _words_scanned_limit = _real_words_scanned_limit;
3095
3096 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3097 _refs_reached_limit = _real_refs_reached_limit;
3098 }
3099
3100 void CMTask::decrease_limits() {
3101 // This is called when we believe that we're going to do an infrequent
3102 // operation which will increase the per byte scanned cost (i.e. move
3103 // entries to/from the global stack). It basically tries to decrease the
3104 // scanning limit so that the clock is called earlier.
3105
3106 if (_cm->verbose_medium())
3107 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3108
3109 _words_scanned_limit = _real_words_scanned_limit -
3110 3 * words_scanned_period / 4;
3111 _refs_reached_limit = _real_refs_reached_limit -
3112 3 * refs_reached_period / 4;
3113 }
3114
3115 void CMTask::move_entries_to_global_stack() {
3116 // local array where we'll store the entries that will be popped
3117 // from the local queue
3118 oop buffer[global_stack_transfer_size];
3119
3120 int n = 0;
3121 oop obj;
3122 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3123 buffer[n] = obj;
3124 ++n;
3125 }
3126
3127 if (n > 0) {
3128 // we popped at least one entry from the local queue
3129
3130 statsOnly( ++_global_transfers_to; _local_pops += n );
3131
3132 if (!_cm->mark_stack_push(buffer, n)) {
3133 if (_cm->verbose_low())
3134 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3135 set_has_aborted();
3136 } else {
3137 // the transfer was successful
3138
3139 if (_cm->verbose_medium())
3140 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3141 _task_id, n);
3142 statsOnly( int tmp_size = _cm->mark_stack_size();
3143 if (tmp_size > _global_max_size)
3144 _global_max_size = tmp_size;
3145 _global_pushes += n );
3146 }
3147 }
3148
3149 // this operation was quite expensive, so decrease the limits
3150 decrease_limits();
3151 }
3152
3153 void CMTask::get_entries_from_global_stack() {
3154 // local array where we'll store the entries that will be popped
3155 // from the global stack.
3156 oop buffer[global_stack_transfer_size];
3157 int n;
3158 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3159 tmp_guarantee_CM( n <= global_stack_transfer_size,
3160 "we should not pop more than the given limit" );
3161 if (n > 0) {
3162 // yes, we did actually pop at least one entry
3163
3164 statsOnly( ++_global_transfers_from; _global_pops += n );
3165 if (_cm->verbose_medium())
3166 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3167 _task_id, n);
3168 for (int i = 0; i < n; ++i) {
3169 bool success = _task_queue->push(buffer[i]);
3170 // We only call this when the local queue is empty or under a
3171 // given target limit. So, we do not expect this push to fail.
3172 tmp_guarantee_CM( success, "invariant" );
3173 }
3174
3175 statsOnly( int tmp_size = _task_queue->size();
3176 if (tmp_size > _local_max_size)
3177 _local_max_size = tmp_size;
3178 _local_pushes += n );
3179 }
3180
3181 // this operation was quite expensive, so decrease the limits
3182 decrease_limits();
3183 }
3184
3185 void CMTask::drain_local_queue(bool partially) {
3186 if (has_aborted())
3187 return;
3188
3189 // Decide what the target size is, depending whether we're going to
3190 // drain it partially (so that other tasks can steal if they run out
3191 // of things to do) or totally (at the very end).
3192 size_t target_size;
3193 if (partially)
3194 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3195 else
3196 target_size = 0;
3197
3198 if (_task_queue->size() > target_size) {
3199 if (_cm->verbose_high())
3200 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3201 _task_id, target_size);
3202
3203 oop obj;
3204 bool ret = _task_queue->pop_local(obj);
3205 while (ret) {
3206 statsOnly( ++_local_pops );
3207
3208 if (_cm->verbose_high())
3209 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3210 (void*) obj);
3211
3212 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3213 "invariant" );
3214
3215 scan_object(obj);
3216
3217 if (_task_queue->size() <= target_size || has_aborted())
3218 ret = false;
3219 else
3220 ret = _task_queue->pop_local(obj);
3221 }
3222
3223 if (_cm->verbose_high())
3224 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3225 _task_id, _task_queue->size());
3226 }
3227 }
3228
3229 void CMTask::drain_global_stack(bool partially) {
3230 if (has_aborted())
3231 return;
3232
3233 // We have a policy to drain the local queue before we attempt to
3234 // drain the global stack.
3235 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3236
3237 // Decide what the target size is, depending whether we're going to
3238 // drain it partially (so that other tasks can steal if they run out
3239 // of things to do) or totally (at the very end). Notice that,
3240 // because we move entries from the global stack in chunks or
3241 // because another task might be doing the same, we might in fact
3242 // drop below the target. But, this is not a problem.
3243 size_t target_size;
3244 if (partially)
3245 target_size = _cm->partial_mark_stack_size_target();
3246 else
3247 target_size = 0;
3248
3249 if (_cm->mark_stack_size() > target_size) {
3250 if (_cm->verbose_low())
3251 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3252 _task_id, target_size);
3253
3254 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3255 get_entries_from_global_stack();
3256 drain_local_queue(partially);
3257 }
3258
3259 if (_cm->verbose_low())
3260 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3261 _task_id, _cm->mark_stack_size());
3262 }
3263 }
3264
3265 // SATB Queue has several assumptions on whether to call the par or
3266 // non-par versions of the methods. this is why some of the code is
3267 // replicated. We should really get rid of the single-threaded version
3268 // of the code to simplify things.
3269 void CMTask::drain_satb_buffers() {
3270 if (has_aborted())
3271 return;
3272
3273 // We set this so that the regular clock knows that we're in the
3274 // middle of draining buffers and doesn't set the abort flag when it
3275 // notices that SATB buffers are available for draining. It'd be
3276 // very counter productive if it did that. :-)
3277 _draining_satb_buffers = true;
3278
3279 CMObjectClosure oc(this);
3280 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3281 if (ParallelGCThreads > 0)
3282 satb_mq_set.set_par_closure(_task_id, &oc);
3283 else
3284 satb_mq_set.set_closure(&oc);
3285
3286 // This keeps claiming and applying the closure to completed buffers
3287 // until we run out of buffers or we need to abort.
3288 if (ParallelGCThreads > 0) {
3289 while (!has_aborted() &&
3290 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3291 if (_cm->verbose_medium())
3292 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3293 statsOnly( ++_satb_buffers_processed );
3294 regular_clock_call();
3295 }
3296 } else {
3297 while (!has_aborted() &&
3298 satb_mq_set.apply_closure_to_completed_buffer()) {
3299 if (_cm->verbose_medium())
3300 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3301 statsOnly( ++_satb_buffers_processed );
3302 regular_clock_call();
3303 }
3304 }
3305
3306 if (!concurrent() && !has_aborted()) {
3307 // We should only do this during remark.
3308 if (ParallelGCThreads > 0)
3309 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3310 else
3311 satb_mq_set.iterate_closure_all_threads();
3312 }
3313
3314 _draining_satb_buffers = false;
3315
3316 tmp_guarantee_CM( has_aborted() ||
3317 concurrent() ||
3318 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3319
3320 if (ParallelGCThreads > 0)
3321 satb_mq_set.set_par_closure(_task_id, NULL);
3322 else
3323 satb_mq_set.set_closure(NULL);
3324
3325 // again, this was a potentially expensive operation, decrease the
3326 // limits to get the regular clock call early
3327 decrease_limits();
3328 }
3329
3330 void CMTask::drain_region_stack(BitMapClosure* bc) {
3331 if (has_aborted())
3332 return;
3333
3334 tmp_guarantee_CM( _region_finger == NULL,
3335 "it should be NULL when we're not scanning a region" );
3336
3337 if (!_cm->region_stack_empty()) {
3338 if (_cm->verbose_low())
3339 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3340 _task_id, _cm->region_stack_size());
3341
3342 MemRegion mr = _cm->region_stack_pop();
3343 // it returns MemRegion() if the pop fails
3344 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3345
3346 while (mr.start() != NULL) {
3347 if (_cm->verbose_medium())
3348 gclog_or_tty->print_cr("[%d] we are scanning region "
3349 "["PTR_FORMAT", "PTR_FORMAT")",
3350 _task_id, mr.start(), mr.end());
3351 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3352 "otherwise the region shouldn't be on the stack" );
3353 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3354 if (_nextMarkBitMap->iterate(bc, mr)) {
3355 tmp_guarantee_CM( !has_aborted(),
3356 "cannot abort the task without aborting the bitmap iteration" );
3357
3358 // We finished iterating over the region without aborting.
3359 regular_clock_call();
3360 if (has_aborted())
3361 mr = MemRegion();
3362 else {
3363 mr = _cm->region_stack_pop();
3364 // it returns MemRegion() if the pop fails
3365 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3366 }
3367 } else {
3368 guarantee( has_aborted(), "currently the only way to do so" );
3369
3370 // The only way to abort the bitmap iteration is to return
3371 // false from the do_bit() method. However, inside the
3372 // do_bit() method we move the _region_finger to point to the
3373 // object currently being looked at. So, if we bail out, we
3374 // have definitely set _region_finger to something non-null.
3375 guarantee( _region_finger != NULL, "invariant" );
3376
3377 // The iteration was actually aborted. So now _region_finger
3378 // points to the address of the object we last scanned. If we
3379 // leave it there, when we restart this task, we will rescan
3380 // the object. It is easy to avoid this. We move the finger by
3381 // enough to point to the next possible object header (the
3382 // bitmap knows by how much we need to move it as it knows its
3383 // granularity).
3384 MemRegion newRegion =
3385 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3386
3387 if (!newRegion.is_empty()) {
3388 if (_cm->verbose_low()) {
3389 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3390 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3391 _task_id,
3392 newRegion.start(), newRegion.end());
3393 }
3394 // Now push the part of the region we didn't scan on the
3395 // region stack to make sure a task scans it later.
3396 _cm->region_stack_push(newRegion);
3397 }
3398 // break from while
3399 mr = MemRegion();
3400 }
3401 _region_finger = NULL;
3402 }
3403
3404 // We only push regions on the region stack during evacuation
3405 // pauses. So if we come out the above iteration because we region
3406 // stack is empty, it will remain empty until the next yield
3407 // point. So, the guarantee below is safe.
3408 guarantee( has_aborted() || _cm->region_stack_empty(),
3409 "only way to exit the loop" );
3410
3411 if (_cm->verbose_low())
3412 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3413 _task_id, _cm->region_stack_size());
3414 }
3415 }
3416
3417 void CMTask::print_stats() {
3418 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3419 _task_id, _calls);
3420 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3421 _elapsed_time_ms, _termination_time_ms);
3422 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3423 _step_times_ms.num(), _step_times_ms.avg(),
3424 _step_times_ms.sd());
3425 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3426 _step_times_ms.maximum(), _step_times_ms.sum());
3427
3428 #if _MARKING_STATS_
3429 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3430 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3431 _all_clock_intervals_ms.sd());
3432 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3433 _all_clock_intervals_ms.maximum(),
3434 _all_clock_intervals_ms.sum());
3435 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3436 _clock_due_to_scanning, _clock_due_to_marking);
3437 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3438 _objs_scanned, _objs_found_on_bitmap);
3439 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3440 _local_pushes, _local_pops, _local_max_size);
3441 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3442 _global_pushes, _global_pops, _global_max_size);
3443 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3444 _global_transfers_to,_global_transfers_from);
3445 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3446 _regions_claimed, _region_stack_pops);
3447 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3448 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3449 _steal_attempts, _steals);
3450 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3451 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3452 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3453 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3454 _aborted_timed_out, _aborted_satb, _aborted_termination);
3455 #endif // _MARKING_STATS_
3456 }
3457
3458 /*****************************************************************************
3459
3460 The do_marking_step(time_target_ms) method is the building block
3461 of the parallel marking framework. It can be called in parallel
3462 with other invocations of do_marking_step() on different tasks
3463 (but only one per task, obviously) and concurrently with the
3464 mutator threads, or during remark, hence it eliminates the need
3465 for two versions of the code. When called during remark, it will
3466 pick up from where the task left off during the concurrent marking
3467 phase. Interestingly, tasks are also claimable during evacuation
3468 pauses too, since do_marking_step() ensures that it aborts before
3469 it needs to yield.
3470
3471 The data structures that is uses to do marking work are the
3472 following:
3473
3474 (1) Marking Bitmap. If there are gray objects that appear only
3475 on the bitmap (this happens either when dealing with an overflow
3476 or when the initial marking phase has simply marked the roots
3477 and didn't push them on the stack), then tasks claim heap
3478 regions whose bitmap they then scan to find gray objects. A
3479 global finger indicates where the end of the last claimed region
3480 is. A local finger indicates how far into the region a task has
3481 scanned. The two fingers are used to determine how to gray an
3482 object (i.e. whether simply marking it is OK, as it will be
3483 visited by a task in the future, or whether it needs to be also
3484 pushed on a stack).
3485
3486 (2) Local Queue. The local queue of the task which is accessed
3487 reasonably efficiently by the task. Other tasks can steal from
3488 it when they run out of work. Throughout the marking phase, a
3489 task attempts to keep its local queue short but not totally
3490 empty, so that entries are available for stealing by other
3491 tasks. Only when there is no more work, a task will totally
3492 drain its local queue.
3493
3494 (3) Global Mark Stack. This handles local queue overflow. During
3495 marking only sets of entries are moved between it and the local
3496 queues, as access to it requires a mutex and more fine-grain
3497 interaction with it which might cause contention. If it
3498 overflows, then the marking phase should restart and iterate
3499 over the bitmap to identify gray objects. Throughout the marking
3500 phase, tasks attempt to keep the global mark stack at a small
3501 length but not totally empty, so that entries are available for
3502 popping by other tasks. Only when there is no more work, tasks
3503 will totally drain the global mark stack.
3504
3505 (4) Global Region Stack. Entries on it correspond to areas of
3506 the bitmap that need to be scanned since they contain gray
3507 objects. Pushes on the region stack only happen during
3508 evacuation pauses and typically correspond to areas covered by
3509 GC LABS. If it overflows, then the marking phase should restart
3510 and iterate over the bitmap to identify gray objects. Tasks will
3511 try to totally drain the region stack as soon as possible.
3512
3513 (5) SATB Buffer Queue. This is where completed SATB buffers are
3514 made available. Buffers are regularly removed from this queue
3515 and scanned for roots, so that the queue doesn't get too
3516 long. During remark, all completed buffers are processed, as
3517 well as the filled in parts of any uncompleted buffers.
3518
3519 The do_marking_step() method tries to abort when the time target
3520 has been reached. There are a few other cases when the
3521 do_marking_step() method also aborts:
3522
3523 (1) When the marking phase has been aborted (after a Full GC).
3524
3525 (2) When a global overflow (either on the global stack or the
3526 region stack) has been triggered. Before the task aborts, it
3527 will actually sync up with the other tasks to ensure that all
3528 the marking data structures (local queues, stacks, fingers etc.)
3529 are re-initialised so that when do_marking_step() completes,
3530 the marking phase can immediately restart.
3531
3532 (3) When enough completed SATB buffers are available. The
3533 do_marking_step() method only tries to drain SATB buffers right
3534 at the beginning. So, if enough buffers are available, the
3535 marking step aborts and the SATB buffers are processed at
3536 the beginning of the next invocation.
3537
3538 (4) To yield. when we have to yield then we abort and yield
3539 right at the end of do_marking_step(). This saves us from a lot
3540 of hassle as, by yielding we might allow a Full GC. If this
3541 happens then objects will be compacted underneath our feet, the
3542 heap might shrink, etc. We save checking for this by just
3543 aborting and doing the yield right at the end.
3544
3545 From the above it follows that the do_marking_step() method should
3546 be called in a loop (or, otherwise, regularly) until it completes.
3547
3548 If a marking step completes without its has_aborted() flag being
3549 true, it means it has completed the current marking phase (and
3550 also all other marking tasks have done so and have all synced up).
3551
3552 A method called regular_clock_call() is invoked "regularly" (in
3553 sub ms intervals) throughout marking. It is this clock method that
3554 checks all the abort conditions which were mentioned above and
3555 decides when the task should abort. A work-based scheme is used to
3556 trigger this clock method: when the number of object words the
3557 marking phase has scanned or the number of references the marking
3558 phase has visited reach a given limit. Additional invocations to
3559 the method clock have been planted in a few other strategic places
3560 too. The initial reason for the clock method was to avoid calling
3561 vtime too regularly, as it is quite expensive. So, once it was in
3562 place, it was natural to piggy-back all the other conditions on it
3563 too and not constantly check them throughout the code.
3564
3565 *****************************************************************************/
3566
3567 void CMTask::do_marking_step(double time_target_ms) {
3568 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3569 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3570
3571 guarantee( concurrent() || _cm->region_stack_empty(),
3572 "the region stack should have been cleared before remark" );
3573 guarantee( _region_finger == NULL,
3574 "this should be non-null only when a region is being scanned" );
3575
3576 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3577 guarantee( _task_queues != NULL, "invariant" );
3578 guarantee( _task_queue != NULL, "invariant" );
3579 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3580
3581 guarantee( !_claimed,
3582 "only one thread should claim this task at any one time" );
3583
3584 // OK, this doesn't safeguard again all possible scenarios, as it is
3585 // possible for two threads to set the _claimed flag at the same
3586 // time. But it is only for debugging purposes anyway and it will
3587 // catch most problems.
3588 _claimed = true;
3589
3590 _start_time_ms = os::elapsedVTime() * 1000.0;
3591 statsOnly( _interval_start_time_ms = _start_time_ms );
3592
3593 double diff_prediction_ms =
3594 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3595 _time_target_ms = time_target_ms - diff_prediction_ms;
3596
3597 // set up the variables that are used in the work-based scheme to
3598 // call the regular clock method
3599 _words_scanned = 0;
3600 _refs_reached = 0;
3601 recalculate_limits();
3602
3603 // clear all flags
3604 clear_has_aborted();
3605 _has_aborted_timed_out = false;
3606 _draining_satb_buffers = false;
3607
3608 ++_calls;
3609
3610 if (_cm->verbose_low())
3611 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3612 "target = %1.2lfms >>>>>>>>>>",
3613 _task_id, _calls, _time_target_ms);
3614
3615 // Set up the bitmap and oop closures. Anything that uses them is
3616 // eventually called from this method, so it is OK to allocate these
3617 // statically.
3618 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3619 CMOopClosure oop_closure(_g1h, _cm, this);
3620 set_oop_closure(&oop_closure);
3621
3622 if (_cm->has_overflown()) {
3623 // This can happen if the region stack or the mark stack overflows
3624 // during a GC pause and this task, after a yield point,
3625 // restarts. We have to abort as we need to get into the overflow
3626 // protocol which happens right at the end of this task.
3627 set_has_aborted();
3628 }
3629
3630 // First drain any available SATB buffers. After this, we will not
3631 // look at SATB buffers before the next invocation of this method.
3632 // If enough completed SATB buffers are queued up, the regular clock
3633 // will abort this task so that it restarts.
3634 drain_satb_buffers();
3635 // ...then partially drain the local queue and the global stack
3636 drain_local_queue(true);
3637 drain_global_stack(true);
3638
3639 // Then totally drain the region stack. We will not look at
3640 // it again before the next invocation of this method. Entries on
3641 // the region stack are only added during evacuation pauses, for
3642 // which we have to yield. When we do, we abort the task anyway so
3643 // it will look at the region stack again when it restarts.
3644 bitmap_closure.set_scanning_heap_region(false);
3645 drain_region_stack(&bitmap_closure);
3646 // ...then partially drain the local queue and the global stack
3647 drain_local_queue(true);
3648 drain_global_stack(true);
3649
3650 do {
3651 if (!has_aborted() && _curr_region != NULL) {
3652 // This means that we're already holding on to a region.
3653 tmp_guarantee_CM( _finger != NULL,
3654 "if region is not NULL, then the finger "
3655 "should not be NULL either" );
3656
3657 // We might have restarted this task after an evacuation pause
3658 // which might have evacuated the region we're holding on to
3659 // underneath our feet. Let's read its limit again to make sure
3660 // that we do not iterate over a region of the heap that
3661 // contains garbage (update_region_limit() will also move
3662 // _finger to the start of the region if it is found empty).
3663 update_region_limit();
3664 // We will start from _finger not from the start of the region,
3665 // as we might be restarting this task after aborting half-way
3666 // through scanning this region. In this case, _finger points to
3667 // the address where we last found a marked object. If this is a
3668 // fresh region, _finger points to start().
3669 MemRegion mr = MemRegion(_finger, _region_limit);
3670
3671 if (_cm->verbose_low())
3672 gclog_or_tty->print_cr("[%d] we're scanning part "
3673 "["PTR_FORMAT", "PTR_FORMAT") "
3674 "of region "PTR_FORMAT,
3675 _task_id, _finger, _region_limit, _curr_region);
3676
3677 // Let's iterate over the bitmap of the part of the
3678 // region that is left.
3679 bitmap_closure.set_scanning_heap_region(true);
3680 if (mr.is_empty() ||
3681 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3682 // We successfully completed iterating over the region. Now,
3683 // let's give up the region.
3684 giveup_current_region();
3685 regular_clock_call();
3686 } else {
3687 guarantee( has_aborted(), "currently the only way to do so" );
3688 // The only way to abort the bitmap iteration is to return
3689 // false from the do_bit() method. However, inside the
3690 // do_bit() method we move the _finger to point to the
3691 // object currently being looked at. So, if we bail out, we
3692 // have definitely set _finger to something non-null.
3693 guarantee( _finger != NULL, "invariant" );
3694
3695 // Region iteration was actually aborted. So now _finger
3696 // points to the address of the object we last scanned. If we
3697 // leave it there, when we restart this task, we will rescan
3698 // the object. It is easy to avoid this. We move the finger by
3699 // enough to point to the next possible object header (the
3700 // bitmap knows by how much we need to move it as it knows its
3701 // granularity).
3702 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3703 }
3704 }
3705 // At this point we have either completed iterating over the
3706 // region we were holding on to, or we have aborted.
3707
3708 // We then partially drain the local queue and the global stack.
3709 // (Do we really need this?)
3710 drain_local_queue(true);
3711 drain_global_stack(true);
3712
3713 // Read the note on the claim_region() method on why it might
3714 // return NULL with potentially more regions available for
3715 // claiming and why we have to check out_of_regions() to determine
3716 // whether we're done or not.
3717 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3718 // We are going to try to claim a new region. We should have
3719 // given up on the previous one.
3720 tmp_guarantee_CM( _curr_region == NULL &&
3721 _finger == NULL &&
3722 _region_limit == NULL, "invariant" );
3723 if (_cm->verbose_low())
3724 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3725 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3726 if (claimed_region != NULL) {
3727 // Yes, we managed to claim one
3728 statsOnly( ++_regions_claimed );
3729
3730 if (_cm->verbose_low())
3731 gclog_or_tty->print_cr("[%d] we successfully claimed "
3732 "region "PTR_FORMAT,
3733 _task_id, claimed_region);
3734
3735 setup_for_region(claimed_region);
3736 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3737 }
3738 // It is important to call the regular clock here. It might take
3739 // a while to claim a region if, for example, we hit a large
3740 // block of empty regions. So we need to call the regular clock
3741 // method once round the loop to make sure it's called
3742 // frequently enough.
3743 regular_clock_call();
3744 }
3745
3746 if (!has_aborted() && _curr_region == NULL) {
3747 tmp_guarantee_CM( _cm->out_of_regions(),
3748 "at this point we should be out of regions" );
3749 }
3750 } while ( _curr_region != NULL && !has_aborted());
3751
3752 if (!has_aborted()) {
3753 // We cannot check whether the global stack is empty, since other
3754 // tasks might be pushing objects to it concurrently.
3755 tmp_guarantee_CM( _cm->out_of_regions() && _cm->region_stack_empty(),
3756 "at this point we should be out of regions" );
3757
3758 if (_cm->verbose_low())
3759 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3760
3761 // Try to reduce the number of available SATB buffers so that
3762 // remark has less work to do.
3763 drain_satb_buffers();
3764 }
3765
3766 // Since we've done everything else, we can now totally drain the
3767 // local queue and global stack.
3768 drain_local_queue(false);
3769 drain_global_stack(false);
3770
3771 // Attempt at work stealing from other task's queues.
3772 if (!has_aborted()) {
3773 // We have not aborted. This means that we have finished all that
3774 // we could. Let's try to do some stealing...
3775
3776 // We cannot check whether the global stack is empty, since other
3777 // tasks might be pushing objects to it concurrently.
3778 guarantee( _cm->out_of_regions() &&
3779 _cm->region_stack_empty() &&
3780 _task_queue->size() == 0, "only way to reach here" );
3781
3782 if (_cm->verbose_low())
3783 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3784
3785 while (!has_aborted()) {
3786 oop obj;
3787 statsOnly( ++_steal_attempts );
3788
3789 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3790 if (_cm->verbose_medium())
3791 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3792 _task_id, (void*) obj);
3793
3794 statsOnly( ++_steals );
3795
3796 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3797 "any stolen object should be marked" );
3798 scan_object(obj);
3799
3800 // And since we're towards the end, let's totally drain the
3801 // local queue and global stack.
3802 drain_local_queue(false);
3803 drain_global_stack(false);
3804 } else {
3805 break;
3806 }
3807 }
3808 }
3809
3810 // We still haven't aborted. Now, let's try to get into the
3811 // termination protocol.
3812 if (!has_aborted()) {
3813 // We cannot check whether the global stack is empty, since other
3814 // tasks might be concurrently pushing objects on it.
3815 guarantee( _cm->out_of_regions() &&
3816 _cm->region_stack_empty() &&
3817 _task_queue->size() == 0, "only way to reach here" );
3818
3819 if (_cm->verbose_low())
3820 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3821
3822 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3823 // The CMTask class also extends the TerminatorTerminator class,
3824 // hence its should_exit_termination() method will also decide
3825 // whether to exit the termination protocol or not.
3826 bool finished = _cm->terminator()->offer_termination(this);
3827 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3828 _termination_time_ms +=
3829 termination_end_time_ms - _termination_start_time_ms;
3830
3831 if (finished) {
3832 // We're all done.
3833
3834 if (_task_id == 0) {
3835 // let's allow task 0 to do this
3836 if (concurrent()) {
3837 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3838 // we need to set this to false before the next
3839 // safepoint. This way we ensure that the marking phase
3840 // doesn't observe any more heap expansions.
3841 _cm->clear_concurrent_marking_in_progress();
3842 }
3843 }
3844
3845 // We can now guarantee that the global stack is empty, since
3846 // all other tasks have finished.
3847 guarantee( _cm->out_of_regions() &&
3848 _cm->region_stack_empty() &&
3849 _cm->mark_stack_empty() &&
3850 _task_queue->size() == 0 &&
3851 !_cm->has_overflown() &&
3852 !_cm->mark_stack_overflow() &&
3853 !_cm->region_stack_overflow(),
3854 "only way to reach here" );
3855
3856 if (_cm->verbose_low())
3857 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3858 } else {
3859 // Apparently there's more work to do. Let's abort this task. It
3860 // will restart it and we can hopefully find more things to do.
3861
3862 if (_cm->verbose_low())
3863 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3864
3865 set_has_aborted();
3866 statsOnly( ++_aborted_termination );
3867 }
3868 }
3869
3870 // Mainly for debugging purposes to make sure that a pointer to the
3871 // closure which was statically allocated in this frame doesn't
3872 // escape it by accident.
3873 set_oop_closure(NULL);
3874 double end_time_ms = os::elapsedVTime() * 1000.0;
3875 double elapsed_time_ms = end_time_ms - _start_time_ms;
3876 // Update the step history.
3877 _step_times_ms.add(elapsed_time_ms);
3878
3879 if (has_aborted()) {
3880 // The task was aborted for some reason.
3881
3882 statsOnly( ++_aborted );
3883
3884 if (_has_aborted_timed_out) {
3885 double diff_ms = elapsed_time_ms - _time_target_ms;
3886 // Keep statistics of how well we did with respect to hitting
3887 // our target only if we actually timed out (if we aborted for
3888 // other reasons, then the results might get skewed).
3889 _marking_step_diffs_ms.add(diff_ms);
3890 }
3891
3892 if (_cm->has_overflown()) {
3893 // This is the interesting one. We aborted because a global
3894 // overflow was raised. This means we have to restart the
3895 // marking phase and start iterating over regions. However, in
3896 // order to do this we have to make sure that all tasks stop
3897 // what they are doing and re-initialise in a safe manner. We
3898 // will achieve this with the use of two barrier sync points.
3899
3900 if (_cm->verbose_low())
3901 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3902
3903 _cm->enter_first_sync_barrier(_task_id);
3904 // When we exit this sync barrier we know that all tasks have
3905 // stopped doing marking work. So, it's now safe to
3906 // re-initialise our data structures. At the end of this method,
3907 // task 0 will clear the global data structures.
3908
3909 statsOnly( ++_aborted_overflow );
3910
3911 // We clear the local state of this task...
3912 clear_region_fields();
3913
3914 // ...and enter the second barrier.
3915 _cm->enter_second_sync_barrier(_task_id);
3916 // At this point everything has bee re-initialised and we're
3917 // ready to restart.
3918 }
3919
3920 if (_cm->verbose_low()) {
3921 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3922 "elapsed = %1.2lfms <<<<<<<<<<",
3923 _task_id, _time_target_ms, elapsed_time_ms);
3924 if (_cm->has_aborted())
3925 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3926 _task_id);
3927 }
3928 } else {
3929 if (_cm->verbose_low())
3930 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3931 "elapsed = %1.2lfms <<<<<<<<<<",
3932 _task_id, _time_target_ms, elapsed_time_ms);
3933 }
3934
3935 _claimed = false;
3936 }
3937
3938 CMTask::CMTask(int task_id,
3939 ConcurrentMark* cm,
3940 CMTaskQueue* task_queue,
3941 CMTaskQueueSet* task_queues)
3942 : _g1h(G1CollectedHeap::heap()),
3943 _co_tracker(G1CMGroup),
3944 _task_id(task_id), _cm(cm),
3945 _claimed(false),
3946 _nextMarkBitMap(NULL), _hash_seed(17),
3947 _task_queue(task_queue),
3948 _task_queues(task_queues),
3949 _oop_closure(NULL) {
3950 guarantee( task_queue != NULL, "invariant" );
3951 guarantee( task_queues != NULL, "invariant" );
3952
3953 statsOnly( _clock_due_to_scanning = 0;
3954 _clock_due_to_marking = 0 );
3955
3956 _marking_step_diffs_ms.add(0.5);
3957 }