comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 362:f8199438385b

Merge
author apetrusenko
date Wed, 17 Sep 2008 16:49:18 +0400
parents 0edda524b58c
children c96030fff130
comparison
equal deleted inserted replaced
316:5fa96a5a7e76 362:f8199438385b
1 /*
2 * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
27
28 //
29 // CMS Bit Map Wrapper
30
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
38
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
51
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
67
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
80
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
85
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
96
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
106
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
116
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
121
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
132
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
140
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
154
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
162
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
174
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
178
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
199
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
226
227
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
244
245
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
263
264
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
266
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
276
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
280
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
299
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
304
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
324
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
347
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
372
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
382
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
388
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
392
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
397
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
410
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
414
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
420
421 _has_overflown(false),
422 _concurrent(false),
423
424 // _verbose_level set below
425
426 _init_times(),
427 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
428 _cleanup_times(),
429 _total_counting_time(0.0),
430 _total_rs_scrub_time(0.0),
431
432 _parallel_workers(NULL),
433 _cleanup_co_tracker(G1CLGroup)
434 {
435 CMVerboseLevel verbose_level =
436 (CMVerboseLevel) G1MarkingVerboseLevel;
437 if (verbose_level < no_verbose)
438 verbose_level = no_verbose;
439 if (verbose_level > high_verbose)
440 verbose_level = high_verbose;
441 _verbose_level = verbose_level;
442
443 if (verbose_low())
444 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
445 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
446
447 _markStack.allocate(G1CMStackSize);
448 _regionStack.allocate(G1CMRegionStackSize);
449
450 // Create & start a ConcurrentMark thread.
451 if (G1ConcMark) {
452 _cmThread = new ConcurrentMarkThread(this);
453 assert(cmThread() != NULL, "CM Thread should have been created");
454 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
455 } else {
456 _cmThread = NULL;
457 }
458 _g1h = G1CollectedHeap::heap();
459 assert(CGC_lock != NULL, "Where's the CGC_lock?");
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
462
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
464 satb_qs.set_buffer_size(G1SATBLogBufferSize);
465
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
468 for (int i = 0 ; i < size; i++) {
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
470 }
471
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
474
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
476 _active_tasks = _max_task_num;
477 for (int i = 0; i < (int) _max_task_num; ++i) {
478 CMTaskQueue* task_queue = new CMTaskQueue();
479 task_queue->initialize();
480 _task_queues->register_queue(i, task_queue);
481
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
483 _accum_task_vtime[i] = 0.0;
484 }
485
486 if (ParallelMarkingThreads > ParallelGCThreads) {
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
488 "than ParallelGCThreads.");
489 }
490 if (ParallelGCThreads == 0) {
491 // if we are not running with any parallel GC threads we will not
492 // spawn any marking threads either
493 _parallel_marking_threads = 0;
494 _sleep_factor = 0.0;
495 _marking_task_overhead = 1.0;
496 } else {
497 if (ParallelMarkingThreads > 0) {
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
499 // if both are set
500
501 _parallel_marking_threads = ParallelMarkingThreads;
502 _sleep_factor = 0.0;
503 _marking_task_overhead = 1.0;
504 } else if (G1MarkingOverheadPerc > 0) {
505 // we will calculate the number of parallel marking threads
506 // based on a target overhead with respect to the soft real-time
507 // goal
508
509 double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
510 double overall_cm_overhead =
511 (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
512 double cpu_ratio = 1.0 / (double) os::processor_count();
513 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
514 double marking_task_overhead =
515 overall_cm_overhead / marking_thread_num *
516 (double) os::processor_count();
517 double sleep_factor =
518 (1.0 - marking_task_overhead) / marking_task_overhead;
519
520 _parallel_marking_threads = (size_t) marking_thread_num;
521 _sleep_factor = sleep_factor;
522 _marking_task_overhead = marking_task_overhead;
523 } else {
524 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
525 _sleep_factor = 0.0;
526 _marking_task_overhead = 1.0;
527 }
528
529 if (parallel_marking_threads() > 1)
530 _cleanup_task_overhead = 1.0;
531 else
532 _cleanup_task_overhead = marking_task_overhead();
533 _cleanup_sleep_factor =
534 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
535
536 #if 0
537 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
538 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
539 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
540 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
541 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
542 #endif
543
544 guarantee( parallel_marking_threads() > 0, "peace of mind" );
545 _parallel_workers = new WorkGang("Parallel Marking Threads",
546 (int) parallel_marking_threads(), false, true);
547 if (_parallel_workers == NULL)
548 vm_exit_during_initialization("Failed necessary allocation.");
549 }
550
551 // so that the call below can read a sensible value
552 _heap_start = (HeapWord*) rs.base();
553 set_non_marking_state();
554 }
555
556 void ConcurrentMark::update_g1_committed(bool force) {
557 // If concurrent marking is not in progress, then we do not need to
558 // update _heap_end. This has a subtle and important
559 // side-effect. Imagine that two evacuation pauses happen between
560 // marking completion and remark. The first one can grow the
561 // heap (hence now the finger is below the heap end). Then, the
562 // second one could unnecessarily push regions on the region
563 // stack. This causes the invariant that the region stack is empty
564 // at the beginning of remark to be false. By ensuring that we do
565 // not observe heap expansions after marking is complete, then we do
566 // not have this problem.
567 if (!concurrent_marking_in_progress() && !force)
568 return;
569
570 MemRegion committed = _g1h->g1_committed();
571 tmp_guarantee_CM( committed.start() == _heap_start,
572 "start shouldn't change" );
573 HeapWord* new_end = committed.end();
574 if (new_end > _heap_end) {
575 // The heap has been expanded.
576
577 _heap_end = new_end;
578 }
579 // Notice that the heap can also shrink. However, this only happens
580 // during a Full GC (at least currently) and the entire marking
581 // phase will bail out and the task will not be restarted. So, let's
582 // do nothing.
583 }
584
585 void ConcurrentMark::reset() {
586 // Starting values for these two. This should be called in a STW
587 // phase. CM will be notified of any future g1_committed expansions
588 // will be at the end of evacuation pauses, when tasks are
589 // inactive.
590 MemRegion committed = _g1h->g1_committed();
591 _heap_start = committed.start();
592 _heap_end = committed.end();
593
594 guarantee( _heap_start != NULL &&
595 _heap_end != NULL &&
596 _heap_start < _heap_end, "heap bounds should look ok" );
597
598 // reset all the marking data structures and any necessary flags
599 clear_marking_state();
600
601 if (verbose_low())
602 gclog_or_tty->print_cr("[global] resetting");
603
604 // We do reset all of them, since different phases will use
605 // different number of active threads. So, it's easiest to have all
606 // of them ready.
607 for (int i = 0; i < (int) _max_task_num; ++i)
608 _tasks[i]->reset(_nextMarkBitMap);
609
610 // we need this to make sure that the flag is on during the evac
611 // pause with initial mark piggy-backed
612 set_concurrent_marking_in_progress();
613 }
614
615 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
616 guarantee( active_tasks <= _max_task_num, "we should not have more" );
617
618 _active_tasks = active_tasks;
619 // Need to update the three data structures below according to the
620 // number of active threads for this phase.
621 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
622 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
623 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
624
625 _concurrent = concurrent;
626 // We propagate this to all tasks, not just the active ones.
627 for (int i = 0; i < (int) _max_task_num; ++i)
628 _tasks[i]->set_concurrent(concurrent);
629
630 if (concurrent) {
631 set_concurrent_marking_in_progress();
632 } else {
633 // We currently assume that the concurrent flag has been set to
634 // false before we start remark. At this point we should also be
635 // in a STW phase.
636 guarantee( !concurrent_marking_in_progress(), "invariant" );
637 guarantee( _finger == _heap_end, "only way to get here" );
638 update_g1_committed(true);
639 }
640 }
641
642 void ConcurrentMark::set_non_marking_state() {
643 // We set the global marking state to some default values when we're
644 // not doing marking.
645 clear_marking_state();
646 _active_tasks = 0;
647 clear_concurrent_marking_in_progress();
648 }
649
650 ConcurrentMark::~ConcurrentMark() {
651 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
652 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
653 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
654 _par_cleanup_thread_state);
655
656 for (int i = 0; i < (int) _max_task_num; ++i) {
657 delete _task_queues->queue(i);
658 delete _tasks[i];
659 }
660 delete _task_queues;
661 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
662 }
663
664 // This closure is used to mark refs into the g1 generation
665 // from external roots in the CMS bit map.
666 // Called at the first checkpoint.
667 //
668
669 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
670 #if PRINT_REACHABLE_AT_INITIAL_MARK
671 static FILE* reachable_file = NULL;
672
673 class PrintReachableClosure: public OopsInGenClosure {
674 CMBitMap* _bm;
675 int _level;
676 public:
677 PrintReachableClosure(CMBitMap* bm) :
678 _bm(bm), _level(0) {
679 guarantee(reachable_file != NULL, "pre-condition");
680 }
681 void do_oop(oop* p) {
682 oop obj = *p;
683 HeapWord* obj_addr = (HeapWord*)obj;
684 if (obj == NULL) return;
685 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
686 _level, p, (void*) obj, _bm->isMarked(obj_addr));
687 if (!_bm->isMarked(obj_addr)) {
688 _bm->mark(obj_addr);
689 _level++;
690 obj->oop_iterate(this);
691 _level--;
692 }
693 }
694 };
695 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
696
697 #define SEND_HEAP_DUMP_TO_FILE 0
698 #if SEND_HEAP_DUMP_TO_FILE
699 static FILE* heap_dump_file = NULL;
700 #endif // SEND_HEAP_DUMP_TO_FILE
701
702 void ConcurrentMark::clearNextBitmap() {
703 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
704
705 // clear the mark bitmap (no grey objects to start with).
706 // We need to do this in chunks and offer to yield in between
707 // each chunk.
708 HeapWord* start = _nextMarkBitMap->startWord();
709 HeapWord* end = _nextMarkBitMap->endWord();
710 HeapWord* cur = start;
711 size_t chunkSize = M;
712 while (cur < end) {
713 HeapWord* next = cur + chunkSize;
714 if (next > end)
715 next = end;
716 MemRegion mr(cur,next);
717 _nextMarkBitMap->clearRange(mr);
718 cur = next;
719 do_yield_check();
720 }
721 }
722
723 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
724 public:
725 bool doHeapRegion(HeapRegion* r) {
726 if (!r->continuesHumongous()) {
727 r->note_start_of_marking(true);
728 }
729 return false;
730 }
731 };
732
733 void ConcurrentMark::checkpointRootsInitialPre() {
734 G1CollectedHeap* g1h = G1CollectedHeap::heap();
735 G1CollectorPolicy* g1p = g1h->g1_policy();
736
737 _has_aborted = false;
738
739 // Find all the reachable objects...
740 #if PRINT_REACHABLE_AT_INITIAL_MARK
741 guarantee(reachable_file == NULL, "Protocol");
742 char fn_buf[100];
743 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
744 reachable_file = fopen(fn_buf, "w");
745 // clear the mark bitmap (no grey objects to start with)
746 _nextMarkBitMap->clearAll();
747 PrintReachableClosure prcl(_nextMarkBitMap);
748 g1h->process_strong_roots(
749 false, // fake perm gen collection
750 SharedHeap::SO_AllClasses,
751 &prcl, // Regular roots
752 &prcl // Perm Gen Roots
753 );
754 // The root iteration above "consumed" dirty cards in the perm gen.
755 // Therefore, as a shortcut, we dirty all such cards.
756 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
757 fclose(reachable_file);
758 reachable_file = NULL;
759 // clear the mark bitmap again.
760 _nextMarkBitMap->clearAll();
761 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
762 COMPILER2_PRESENT(DerivedPointerTable::clear());
763 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
764
765 // Initialise marking structures. This has to be done in a STW phase.
766 reset();
767 }
768
769 class CMMarkRootsClosure: public OopsInGenClosure {
770 private:
771 ConcurrentMark* _cm;
772 G1CollectedHeap* _g1h;
773 bool _do_barrier;
774
775 public:
776 CMMarkRootsClosure(ConcurrentMark* cm,
777 G1CollectedHeap* g1h,
778 bool do_barrier) : _cm(cm), _g1h(g1h),
779 _do_barrier(do_barrier) { }
780
781 virtual void do_oop(narrowOop* p) {
782 guarantee(false, "NYI");
783 }
784
785 virtual void do_oop(oop* p) {
786 oop thisOop = *p;
787 if (thisOop != NULL) {
788 assert(thisOop->is_oop() || thisOop->mark() == NULL,
789 "expected an oop, possibly with mark word displaced");
790 HeapWord* addr = (HeapWord*)thisOop;
791 if (_g1h->is_in_g1_reserved(addr)) {
792 _cm->grayRoot(thisOop);
793 }
794 }
795 if (_do_barrier) {
796 assert(!_g1h->is_in_g1_reserved(p),
797 "Should be called on external roots");
798 do_barrier(p);
799 }
800 }
801 };
802
803 void ConcurrentMark::checkpointRootsInitialPost() {
804 G1CollectedHeap* g1h = G1CollectedHeap::heap();
805
806 // For each region note start of marking.
807 NoteStartOfMarkHRClosure startcl;
808 g1h->heap_region_iterate(&startcl);
809
810 // Start weak-reference discovery.
811 ReferenceProcessor* rp = g1h->ref_processor();
812 rp->verify_no_references_recorded();
813 rp->enable_discovery(); // enable ("weak") refs discovery
814
815 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
816 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
817 satb_mq_set.set_active_all_threads(true);
818
819 // update_g1_committed() will be called at the end of an evac pause
820 // when marking is on. So, it's also called at the end of the
821 // initial-mark pause to update the heap end, if the heap expands
822 // during it. No need to call it here.
823
824 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
825
826 size_t max_marking_threads =
827 MAX2((size_t) 1, parallel_marking_threads());
828 for (int i = 0; i < (int)_max_task_num; ++i) {
829 _tasks[i]->enable_co_tracker();
830 if (i < (int) max_marking_threads)
831 _tasks[i]->reset_co_tracker(marking_task_overhead());
832 else
833 _tasks[i]->reset_co_tracker(0.0);
834 }
835 }
836
837 // Checkpoint the roots into this generation from outside
838 // this generation. [Note this initial checkpoint need only
839 // be approximate -- we'll do a catch up phase subsequently.]
840 void ConcurrentMark::checkpointRootsInitial() {
841 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
842 G1CollectedHeap* g1h = G1CollectedHeap::heap();
843
844 double start = os::elapsedTime();
845 GCOverheadReporter::recordSTWStart(start);
846
847 // If there has not been a GC[n-1] since last GC[n] cycle completed,
848 // precede our marking with a collection of all
849 // younger generations to keep floating garbage to a minimum.
850 // YSR: we won't do this for now -- it's an optimization to be
851 // done post-beta.
852
853 // YSR: ignoring weak refs for now; will do at bug fixing stage
854 // EVM: assert(discoveredRefsAreClear());
855
856
857 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
858 g1p->record_concurrent_mark_init_start();
859 checkpointRootsInitialPre();
860
861 // YSR: when concurrent precleaning is in place, we'll
862 // need to clear the cached card table here
863
864 ResourceMark rm;
865 HandleMark hm;
866
867 g1h->ensure_parsability(false);
868 g1h->perm_gen()->save_marks();
869
870 CMMarkRootsClosure notOlder(this, g1h, false);
871 CMMarkRootsClosure older(this, g1h, true);
872
873 g1h->set_marking_started();
874 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
875
876 g1h->process_strong_roots(false, // fake perm gen collection
877 SharedHeap::SO_AllClasses,
878 &notOlder, // Regular roots
879 &older // Perm Gen Roots
880 );
881 checkpointRootsInitialPost();
882
883 // Statistics.
884 double end = os::elapsedTime();
885 _init_times.add((end - start) * 1000.0);
886 GCOverheadReporter::recordSTWEnd(end);
887
888 g1p->record_concurrent_mark_init_end();
889 }
890
891 /*
892 Notice that in the next two methods, we actually leave the STS
893 during the barrier sync and join it immediately afterwards. If we
894 do not do this, this then the following deadlock can occur: one
895 thread could be in the barrier sync code, waiting for the other
896 thread to also sync up, whereas another one could be trying to
897 yield, while also waiting for the other threads to sync up too.
898
899 Because the thread that does the sync barrier has left the STS, it
900 is possible to be suspended for a Full GC or an evacuation pause
901 could occur. This is actually safe, since the entering the sync
902 barrier is one of the last things do_marking_step() does, and it
903 doesn't manipulate any data structures afterwards.
904 */
905
906 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
907 if (verbose_low())
908 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
909
910 ConcurrentGCThread::stsLeave();
911 _first_overflow_barrier_sync.enter();
912 ConcurrentGCThread::stsJoin();
913 // at this point everyone should have synced up and not be doing any
914 // more work
915
916 if (verbose_low())
917 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
918
919 // let task 0 do this
920 if (task_num == 0) {
921 // task 0 is responsible for clearing the global data structures
922 clear_marking_state();
923
924 if (PrintGC) {
925 gclog_or_tty->date_stamp(PrintGCDateStamps);
926 gclog_or_tty->stamp(PrintGCTimeStamps);
927 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
928 }
929 }
930
931 // after this, each task should reset its own data structures then
932 // then go into the second barrier
933 }
934
935 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
936 if (verbose_low())
937 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
938
939 ConcurrentGCThread::stsLeave();
940 _second_overflow_barrier_sync.enter();
941 ConcurrentGCThread::stsJoin();
942 // at this point everything should be re-initialised and ready to go
943
944 if (verbose_low())
945 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
946 }
947
948 void ConcurrentMark::grayRoot(oop p) {
949 HeapWord* addr = (HeapWord*) p;
950 // We can't really check against _heap_start and _heap_end, since it
951 // is possible during an evacuation pause with piggy-backed
952 // initial-mark that the committed space is expanded during the
953 // pause without CM observing this change. So the assertions below
954 // is a bit conservative; but better than nothing.
955 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
956 "address should be within the heap bounds" );
957
958 if (!_nextMarkBitMap->isMarked(addr))
959 _nextMarkBitMap->parMark(addr);
960 }
961
962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
963 // The objects on the region have already been marked "in bulk" by
964 // the caller. We only need to decide whether to push the region on
965 // the region stack or not.
966
967 if (!concurrent_marking_in_progress() || !_should_gray_objects)
968 // We're done with marking and waiting for remark. We do not need to
969 // push anything else on the region stack.
970 return;
971
972 HeapWord* finger = _finger;
973
974 if (verbose_low())
975 gclog_or_tty->print_cr("[global] attempting to push "
976 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
977 PTR_FORMAT, mr.start(), mr.end(), finger);
978
979 if (mr.start() < finger) {
980 // The finger is always heap region aligned and it is not possible
981 // for mr to span heap regions.
982 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
983
984 tmp_guarantee_CM( mr.start() <= mr.end() &&
985 _heap_start <= mr.start() &&
986 mr.end() <= _heap_end,
987 "region boundaries should fall within the committed space" );
988 if (verbose_low())
989 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
990 "below the finger, pushing it",
991 mr.start(), mr.end());
992
993 if (!region_stack_push(mr)) {
994 if (verbose_low())
995 gclog_or_tty->print_cr("[global] region stack has overflown.");
996 }
997 }
998 }
999
1000 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1001 // The object is not marked by the caller. We need to at least mark
1002 // it and maybe push in on the stack.
1003
1004 HeapWord* addr = (HeapWord*)p;
1005 if (!_nextMarkBitMap->isMarked(addr)) {
1006 // We definitely need to mark it, irrespective whether we bail out
1007 // because we're done with marking.
1008 if (_nextMarkBitMap->parMark(addr)) {
1009 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1010 // If we're done with concurrent marking and we're waiting for
1011 // remark, then we're not pushing anything on the stack.
1012 return;
1013
1014 // No OrderAccess:store_load() is needed. It is implicit in the
1015 // CAS done in parMark(addr) above
1016 HeapWord* finger = _finger;
1017
1018 if (addr < finger) {
1019 if (!mark_stack_push(oop(addr))) {
1020 if (verbose_low())
1021 gclog_or_tty->print_cr("[global] global stack overflow "
1022 "during parMark");
1023 }
1024 }
1025 }
1026 }
1027 }
1028
1029 class CMConcurrentMarkingTask: public AbstractGangTask {
1030 private:
1031 ConcurrentMark* _cm;
1032 ConcurrentMarkThread* _cmt;
1033
1034 public:
1035 void work(int worker_i) {
1036 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1037 "this should only be done by a conc GC thread" );
1038
1039 double start_vtime = os::elapsedVTime();
1040
1041 ConcurrentGCThread::stsJoin();
1042
1043 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1044 CMTask* the_task = _cm->task(worker_i);
1045 the_task->start_co_tracker();
1046 the_task->record_start_time();
1047 if (!_cm->has_aborted()) {
1048 do {
1049 double start_vtime_sec = os::elapsedVTime();
1050 double start_time_sec = os::elapsedTime();
1051 the_task->do_marking_step(10.0);
1052 double end_time_sec = os::elapsedTime();
1053 double end_vtime_sec = os::elapsedVTime();
1054 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1055 double elapsed_time_sec = end_time_sec - start_time_sec;
1056 _cm->clear_has_overflown();
1057
1058 bool ret = _cm->do_yield_check(worker_i);
1059
1060 jlong sleep_time_ms;
1061 if (!_cm->has_aborted() && the_task->has_aborted()) {
1062 sleep_time_ms =
1063 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1064 ConcurrentGCThread::stsLeave();
1065 os::sleep(Thread::current(), sleep_time_ms, false);
1066 ConcurrentGCThread::stsJoin();
1067 }
1068 double end_time2_sec = os::elapsedTime();
1069 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1070
1071 the_task->update_co_tracker();
1072
1073 #if 0
1074 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1075 "overhead %1.4lf",
1076 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1077 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1078 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1079 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1080 #endif
1081 } while (!_cm->has_aborted() && the_task->has_aborted());
1082 }
1083 the_task->record_end_time();
1084 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1085
1086 ConcurrentGCThread::stsLeave();
1087
1088 double end_vtime = os::elapsedVTime();
1089 the_task->update_co_tracker(true);
1090 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1091 }
1092
1093 CMConcurrentMarkingTask(ConcurrentMark* cm,
1094 ConcurrentMarkThread* cmt) :
1095 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1096
1097 ~CMConcurrentMarkingTask() { }
1098 };
1099
1100 void ConcurrentMark::markFromRoots() {
1101 // we might be tempted to assert that:
1102 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1103 // "inconsistent argument?");
1104 // However that wouldn't be right, because it's possible that
1105 // a safepoint is indeed in progress as a younger generation
1106 // stop-the-world GC happens even as we mark in this generation.
1107
1108 _restart_for_overflow = false;
1109
1110 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1111
1112 CMConcurrentMarkingTask markingTask(this, cmThread());
1113 if (parallel_marking_threads() > 0)
1114 _parallel_workers->run_task(&markingTask);
1115 else
1116 markingTask.work(0);
1117 print_stats();
1118 }
1119
1120 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1121 // world is stopped at this checkpoint
1122 assert(SafepointSynchronize::is_at_safepoint(),
1123 "world should be stopped");
1124 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1125
1126 // If a full collection has happened, we shouldn't do this.
1127 if (has_aborted()) {
1128 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1129 return;
1130 }
1131
1132 G1CollectorPolicy* g1p = g1h->g1_policy();
1133 g1p->record_concurrent_mark_remark_start();
1134
1135 double start = os::elapsedTime();
1136 GCOverheadReporter::recordSTWStart(start);
1137
1138 checkpointRootsFinalWork();
1139
1140 double mark_work_end = os::elapsedTime();
1141
1142 weakRefsWork(clear_all_soft_refs);
1143
1144 if (has_overflown()) {
1145 // Oops. We overflowed. Restart concurrent marking.
1146 _restart_for_overflow = true;
1147 // Clear the flag. We do not need it any more.
1148 clear_has_overflown();
1149 if (G1TraceMarkStackOverflow)
1150 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1151 } else {
1152 // We're done with marking.
1153 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1154 }
1155
1156 #if VERIFY_OBJS_PROCESSED
1157 _scan_obj_cl.objs_processed = 0;
1158 ThreadLocalObjQueue::objs_enqueued = 0;
1159 #endif
1160
1161 // Statistics
1162 double now = os::elapsedTime();
1163 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1164 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1165 _remark_times.add((now - start) * 1000.0);
1166
1167 GCOverheadReporter::recordSTWEnd(now);
1168 for (int i = 0; i < (int)_max_task_num; ++i)
1169 _tasks[i]->disable_co_tracker();
1170 _cleanup_co_tracker.enable();
1171 _cleanup_co_tracker.reset(cleanup_task_overhead());
1172 g1p->record_concurrent_mark_remark_end();
1173 }
1174
1175
1176 #define CARD_BM_TEST_MODE 0
1177
1178 class CalcLiveObjectsClosure: public HeapRegionClosure {
1179
1180 CMBitMapRO* _bm;
1181 ConcurrentMark* _cm;
1182 COTracker* _co_tracker;
1183 bool _changed;
1184 bool _yield;
1185 size_t _words_done;
1186 size_t _tot_live;
1187 size_t _tot_used;
1188 size_t _regions_done;
1189 double _start_vtime_sec;
1190
1191 BitMap* _region_bm;
1192 BitMap* _card_bm;
1193 intptr_t _bottom_card_num;
1194 bool _final;
1195
1196 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1197 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1198 #if CARD_BM_TEST_MODE
1199 guarantee(_card_bm->at(i - _bottom_card_num),
1200 "Should already be set.");
1201 #else
1202 _card_bm->par_at_put(i - _bottom_card_num, 1);
1203 #endif
1204 }
1205 }
1206
1207 public:
1208 CalcLiveObjectsClosure(bool final,
1209 CMBitMapRO *bm, ConcurrentMark *cm,
1210 BitMap* region_bm, BitMap* card_bm,
1211 COTracker* co_tracker) :
1212 _bm(bm), _cm(cm), _changed(false), _yield(true),
1213 _words_done(0), _tot_live(0), _tot_used(0),
1214 _region_bm(region_bm), _card_bm(card_bm),
1215 _final(final), _co_tracker(co_tracker),
1216 _regions_done(0), _start_vtime_sec(0.0)
1217 {
1218 _bottom_card_num =
1219 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1220 CardTableModRefBS::card_shift);
1221 }
1222
1223 bool doHeapRegion(HeapRegion* hr) {
1224 if (_co_tracker != NULL)
1225 _co_tracker->update();
1226
1227 if (!_final && _regions_done == 0)
1228 _start_vtime_sec = os::elapsedVTime();
1229
1230 if (hr->continuesHumongous()) return false;
1231
1232 HeapWord* nextTop = hr->next_top_at_mark_start();
1233 HeapWord* start = hr->top_at_conc_mark_count();
1234 assert(hr->bottom() <= start && start <= hr->end() &&
1235 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1236 start <= nextTop,
1237 "Preconditions.");
1238 // Otherwise, record the number of word's we'll examine.
1239 size_t words_done = (nextTop - start);
1240 // Find the first marked object at or after "start".
1241 start = _bm->getNextMarkedWordAddress(start, nextTop);
1242 size_t marked_bytes = 0;
1243
1244 // Below, the term "card num" means the result of shifting an address
1245 // by the card shift -- address 0 corresponds to card number 0. One
1246 // must subtract the card num of the bottom of the heap to obtain a
1247 // card table index.
1248 // The first card num of the sequence of live cards currently being
1249 // constructed. -1 ==> no sequence.
1250 intptr_t start_card_num = -1;
1251 // The last card num of the sequence of live cards currently being
1252 // constructed. -1 ==> no sequence.
1253 intptr_t last_card_num = -1;
1254
1255 while (start < nextTop) {
1256 if (_yield && _cm->do_yield_check()) {
1257 // We yielded. It might be for a full collection, in which case
1258 // all bets are off; terminate the traversal.
1259 if (_cm->has_aborted()) {
1260 _changed = false;
1261 return true;
1262 } else {
1263 // Otherwise, it might be a collection pause, and the region
1264 // we're looking at might be in the collection set. We'll
1265 // abandon this region.
1266 return false;
1267 }
1268 }
1269 oop obj = oop(start);
1270 int obj_sz = obj->size();
1271 // The card num of the start of the current object.
1272 intptr_t obj_card_num =
1273 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1274
1275 HeapWord* obj_last = start + obj_sz - 1;
1276 intptr_t obj_last_card_num =
1277 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1278
1279 if (obj_card_num != last_card_num) {
1280 if (start_card_num == -1) {
1281 assert(last_card_num == -1, "Both or neither.");
1282 start_card_num = obj_card_num;
1283 } else {
1284 assert(last_card_num != -1, "Both or neither.");
1285 assert(obj_card_num >= last_card_num, "Inv");
1286 if ((obj_card_num - last_card_num) > 1) {
1287 // Mark the last run, and start a new one.
1288 mark_card_num_range(start_card_num, last_card_num);
1289 start_card_num = obj_card_num;
1290 }
1291 }
1292 #if CARD_BM_TEST_MODE
1293 /*
1294 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1295 obj_card_num - _bottom_card_num,
1296 obj_last_card_num - _bottom_card_num);
1297 */
1298 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1299 _card_bm->par_at_put(j - _bottom_card_num, 1);
1300 }
1301 #endif
1302 }
1303 // In any case, we set the last card num.
1304 last_card_num = obj_last_card_num;
1305
1306 marked_bytes += obj_sz * HeapWordSize;
1307 // Find the next marked object after this one.
1308 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1309 _changed = true;
1310 }
1311 // Handle the last range, if any.
1312 if (start_card_num != -1)
1313 mark_card_num_range(start_card_num, last_card_num);
1314 if (_final) {
1315 // Mark the allocated-since-marking portion...
1316 HeapWord* tp = hr->top();
1317 if (nextTop < tp) {
1318 start_card_num =
1319 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1320 last_card_num =
1321 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1322 mark_card_num_range(start_card_num, last_card_num);
1323 // This definitely means the region has live objects.
1324 _region_bm->par_at_put(hr->hrs_index(), 1);
1325 }
1326 }
1327
1328 hr->add_to_marked_bytes(marked_bytes);
1329 // Update the live region bitmap.
1330 if (marked_bytes > 0) {
1331 _region_bm->par_at_put(hr->hrs_index(), 1);
1332 }
1333 hr->set_top_at_conc_mark_count(nextTop);
1334 _tot_live += hr->next_live_bytes();
1335 _tot_used += hr->used();
1336 _words_done = words_done;
1337
1338 if (!_final) {
1339 ++_regions_done;
1340 if (_regions_done % 10 == 0) {
1341 double end_vtime_sec = os::elapsedVTime();
1342 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1343 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1344 jlong sleep_time_ms =
1345 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1346 #if 0
1347 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1348 "overhead %1.4lf",
1349 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1350 _co_tracker->concOverhead(os::elapsedTime()));
1351 #endif
1352 os::sleep(Thread::current(), sleep_time_ms, false);
1353 _start_vtime_sec = end_vtime_sec;
1354 }
1355 }
1356 }
1357
1358 return false;
1359 }
1360
1361 bool changed() { return _changed; }
1362 void reset() { _changed = false; _words_done = 0; }
1363 void no_yield() { _yield = false; }
1364 size_t words_done() { return _words_done; }
1365 size_t tot_live() { return _tot_live; }
1366 size_t tot_used() { return _tot_used; }
1367 };
1368
1369
1370 void ConcurrentMark::calcDesiredRegions() {
1371 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1372 _cleanup_co_tracker.start();
1373
1374 _region_bm.clear();
1375 _card_bm.clear();
1376 CalcLiveObjectsClosure calccl(false /*final*/,
1377 nextMarkBitMap(), this,
1378 &_region_bm, &_card_bm,
1379 &_cleanup_co_tracker);
1380 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1381 g1h->heap_region_iterate(&calccl);
1382
1383 do {
1384 calccl.reset();
1385 g1h->heap_region_iterate(&calccl);
1386 } while (calccl.changed());
1387
1388 _cleanup_co_tracker.update(true);
1389 }
1390
1391 class G1ParFinalCountTask: public AbstractGangTask {
1392 protected:
1393 G1CollectedHeap* _g1h;
1394 CMBitMap* _bm;
1395 size_t _n_workers;
1396 size_t *_live_bytes;
1397 size_t *_used_bytes;
1398 BitMap* _region_bm;
1399 BitMap* _card_bm;
1400 public:
1401 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1402 BitMap* region_bm, BitMap* card_bm) :
1403 AbstractGangTask("G1 final counting"), _g1h(g1h),
1404 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1405 {
1406 if (ParallelGCThreads > 0)
1407 _n_workers = _g1h->workers()->total_workers();
1408 else
1409 _n_workers = 1;
1410 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1411 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1412 }
1413
1414 ~G1ParFinalCountTask() {
1415 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1416 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1417 }
1418
1419 void work(int i) {
1420 CalcLiveObjectsClosure calccl(true /*final*/,
1421 _bm, _g1h->concurrent_mark(),
1422 _region_bm, _card_bm,
1423 NULL /* CO tracker */);
1424 calccl.no_yield();
1425 if (ParallelGCThreads > 0) {
1426 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1427 HeapRegion::FinalCountClaimValue);
1428 } else {
1429 _g1h->heap_region_iterate(&calccl);
1430 }
1431 assert(calccl.complete(), "Shouldn't have yielded!");
1432
1433 guarantee( (size_t)i < _n_workers, "invariant" );
1434 _live_bytes[i] = calccl.tot_live();
1435 _used_bytes[i] = calccl.tot_used();
1436 }
1437 size_t live_bytes() {
1438 size_t live_bytes = 0;
1439 for (size_t i = 0; i < _n_workers; ++i)
1440 live_bytes += _live_bytes[i];
1441 return live_bytes;
1442 }
1443 size_t used_bytes() {
1444 size_t used_bytes = 0;
1445 for (size_t i = 0; i < _n_workers; ++i)
1446 used_bytes += _used_bytes[i];
1447 return used_bytes;
1448 }
1449 };
1450
1451 class G1ParNoteEndTask;
1452
1453 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1454 G1CollectedHeap* _g1;
1455 int _worker_num;
1456 size_t _max_live_bytes;
1457 size_t _regions_claimed;
1458 size_t _freed_bytes;
1459 size_t _cleared_h_regions;
1460 size_t _freed_regions;
1461 UncleanRegionList* _unclean_region_list;
1462 double _claimed_region_time;
1463 double _max_region_time;
1464
1465 public:
1466 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1467 UncleanRegionList* list,
1468 int worker_num);
1469 size_t freed_bytes() { return _freed_bytes; }
1470 size_t cleared_h_regions() { return _cleared_h_regions; }
1471 size_t freed_regions() { return _freed_regions; }
1472 UncleanRegionList* unclean_region_list() {
1473 return _unclean_region_list;
1474 }
1475
1476 bool doHeapRegion(HeapRegion *r);
1477
1478 size_t max_live_bytes() { return _max_live_bytes; }
1479 size_t regions_claimed() { return _regions_claimed; }
1480 double claimed_region_time_sec() { return _claimed_region_time; }
1481 double max_region_time_sec() { return _max_region_time; }
1482 };
1483
1484 class G1ParNoteEndTask: public AbstractGangTask {
1485 friend class G1NoteEndOfConcMarkClosure;
1486 protected:
1487 G1CollectedHeap* _g1h;
1488 size_t _max_live_bytes;
1489 size_t _freed_bytes;
1490 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1491 public:
1492 G1ParNoteEndTask(G1CollectedHeap* g1h,
1493 ConcurrentMark::ParCleanupThreadState**
1494 par_cleanup_thread_state) :
1495 AbstractGangTask("G1 note end"), _g1h(g1h),
1496 _max_live_bytes(0), _freed_bytes(0),
1497 _par_cleanup_thread_state(par_cleanup_thread_state)
1498 {}
1499
1500 void work(int i) {
1501 double start = os::elapsedTime();
1502 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1503 &_par_cleanup_thread_state[i]->list,
1504 i);
1505 if (ParallelGCThreads > 0) {
1506 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1507 HeapRegion::NoteEndClaimValue);
1508 } else {
1509 _g1h->heap_region_iterate(&g1_note_end);
1510 }
1511 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1512
1513 // Now finish up freeing the current thread's regions.
1514 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1515 g1_note_end.cleared_h_regions(),
1516 0, NULL);
1517 {
1518 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1519 _max_live_bytes += g1_note_end.max_live_bytes();
1520 _freed_bytes += g1_note_end.freed_bytes();
1521 }
1522 double end = os::elapsedTime();
1523 if (G1PrintParCleanupStats) {
1524 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1525 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1526 i, start, end, (end-start)*1000.0,
1527 g1_note_end.regions_claimed(),
1528 g1_note_end.claimed_region_time_sec()*1000.0,
1529 g1_note_end.max_region_time_sec()*1000.0);
1530 }
1531 }
1532 size_t max_live_bytes() { return _max_live_bytes; }
1533 size_t freed_bytes() { return _freed_bytes; }
1534 };
1535
1536 class G1ParScrubRemSetTask: public AbstractGangTask {
1537 protected:
1538 G1RemSet* _g1rs;
1539 BitMap* _region_bm;
1540 BitMap* _card_bm;
1541 public:
1542 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1543 BitMap* region_bm, BitMap* card_bm) :
1544 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1545 _region_bm(region_bm), _card_bm(card_bm)
1546 {}
1547
1548 void work(int i) {
1549 if (ParallelGCThreads > 0) {
1550 _g1rs->scrub_par(_region_bm, _card_bm, i,
1551 HeapRegion::ScrubRemSetClaimValue);
1552 } else {
1553 _g1rs->scrub(_region_bm, _card_bm);
1554 }
1555 }
1556
1557 };
1558
1559 G1NoteEndOfConcMarkClosure::
1560 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1561 UncleanRegionList* list,
1562 int worker_num)
1563 : _g1(g1), _worker_num(worker_num),
1564 _max_live_bytes(0), _regions_claimed(0),
1565 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1566 _claimed_region_time(0.0), _max_region_time(0.0),
1567 _unclean_region_list(list)
1568 {}
1569
1570 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1571 // We use a claim value of zero here because all regions
1572 // were claimed with value 1 in the FinalCount task.
1573 r->reset_gc_time_stamp();
1574 if (!r->continuesHumongous()) {
1575 double start = os::elapsedTime();
1576 _regions_claimed++;
1577 r->note_end_of_marking();
1578 _max_live_bytes += r->max_live_bytes();
1579 _g1->free_region_if_totally_empty_work(r,
1580 _freed_bytes,
1581 _cleared_h_regions,
1582 _freed_regions,
1583 _unclean_region_list,
1584 true /*par*/);
1585 double region_time = (os::elapsedTime() - start);
1586 _claimed_region_time += region_time;
1587 if (region_time > _max_region_time) _max_region_time = region_time;
1588 }
1589 return false;
1590 }
1591
1592 void ConcurrentMark::cleanup() {
1593 // world is stopped at this checkpoint
1594 assert(SafepointSynchronize::is_at_safepoint(),
1595 "world should be stopped");
1596 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1597
1598 // If a full collection has happened, we shouldn't do this.
1599 if (has_aborted()) {
1600 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1601 return;
1602 }
1603
1604 _cleanup_co_tracker.disable();
1605
1606 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1607 g1p->record_concurrent_mark_cleanup_start();
1608
1609 double start = os::elapsedTime();
1610 GCOverheadReporter::recordSTWStart(start);
1611
1612 // Do counting once more with the world stopped for good measure.
1613 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1614 &_region_bm, &_card_bm);
1615 if (ParallelGCThreads > 0) {
1616 assert(g1h->check_heap_region_claim_values(
1617 HeapRegion::InitialClaimValue),
1618 "sanity check");
1619
1620 int n_workers = g1h->workers()->total_workers();
1621 g1h->set_par_threads(n_workers);
1622 g1h->workers()->run_task(&g1_par_count_task);
1623 g1h->set_par_threads(0);
1624
1625 assert(g1h->check_heap_region_claim_values(
1626 HeapRegion::FinalCountClaimValue),
1627 "sanity check");
1628 } else {
1629 g1_par_count_task.work(0);
1630 }
1631
1632 size_t known_garbage_bytes =
1633 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1634 #if 0
1635 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1636 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1637 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1638 (double) known_garbage_bytes / (double) (1024 * 1024));
1639 #endif // 0
1640 g1p->set_known_garbage_bytes(known_garbage_bytes);
1641
1642 size_t start_used_bytes = g1h->used();
1643 _at_least_one_mark_complete = true;
1644 g1h->set_marking_complete();
1645
1646 double count_end = os::elapsedTime();
1647 double this_final_counting_time = (count_end - start);
1648 if (G1PrintParCleanupStats) {
1649 gclog_or_tty->print_cr("Cleanup:");
1650 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1651 this_final_counting_time*1000.0);
1652 }
1653 _total_counting_time += this_final_counting_time;
1654
1655 // Install newly created mark bitMap as "prev".
1656 swapMarkBitMaps();
1657
1658 g1h->reset_gc_time_stamp();
1659
1660 // Note end of marking in all heap regions.
1661 double note_end_start = os::elapsedTime();
1662 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1663 if (ParallelGCThreads > 0) {
1664 int n_workers = g1h->workers()->total_workers();
1665 g1h->set_par_threads(n_workers);
1666 g1h->workers()->run_task(&g1_par_note_end_task);
1667 g1h->set_par_threads(0);
1668
1669 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1670 "sanity check");
1671 } else {
1672 g1_par_note_end_task.work(0);
1673 }
1674 g1h->set_unclean_regions_coming(true);
1675 double note_end_end = os::elapsedTime();
1676 // Tell the mutators that there might be unclean regions coming...
1677 if (G1PrintParCleanupStats) {
1678 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1679 (note_end_end - note_end_start)*1000.0);
1680 }
1681
1682
1683 // call below, since it affects the metric by which we sort the heap
1684 // regions.
1685 if (G1ScrubRemSets) {
1686 double rs_scrub_start = os::elapsedTime();
1687 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1688 if (ParallelGCThreads > 0) {
1689 int n_workers = g1h->workers()->total_workers();
1690 g1h->set_par_threads(n_workers);
1691 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1692 g1h->set_par_threads(0);
1693
1694 assert(g1h->check_heap_region_claim_values(
1695 HeapRegion::ScrubRemSetClaimValue),
1696 "sanity check");
1697 } else {
1698 g1_par_scrub_rs_task.work(0);
1699 }
1700
1701 double rs_scrub_end = os::elapsedTime();
1702 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1703 _total_rs_scrub_time += this_rs_scrub_time;
1704 }
1705
1706 // this will also free any regions totally full of garbage objects,
1707 // and sort the regions.
1708 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1709 g1_par_note_end_task.freed_bytes(),
1710 g1_par_note_end_task.max_live_bytes());
1711
1712 // Statistics.
1713 double end = os::elapsedTime();
1714 _cleanup_times.add((end - start) * 1000.0);
1715 GCOverheadReporter::recordSTWEnd(end);
1716
1717 // G1CollectedHeap::heap()->print();
1718 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1719 // G1CollectedHeap::heap()->get_gc_time_stamp());
1720
1721 if (PrintGC || PrintGCDetails) {
1722 g1h->print_size_transition(gclog_or_tty,
1723 start_used_bytes,
1724 g1h->used(),
1725 g1h->capacity());
1726 }
1727
1728 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1729 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1730
1731 // We need to make this be a "collection" so any collection pause that
1732 // races with it goes around and waits for completeCleanup to finish.
1733 g1h->increment_total_collections();
1734
1735 #ifndef PRODUCT
1736 if (G1VerifyConcMark) {
1737 G1CollectedHeap::heap()->prepare_for_verify();
1738 G1CollectedHeap::heap()->verify(true,false);
1739 }
1740 #endif
1741 }
1742
1743 void ConcurrentMark::completeCleanup() {
1744 // A full collection intervened.
1745 if (has_aborted()) return;
1746
1747 int first = 0;
1748 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1749 for (int t = 0; t < last; t++) {
1750 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1751 assert(list->well_formed(), "Inv");
1752 HeapRegion* hd = list->hd();
1753 while (hd != NULL) {
1754 // Now finish up the other stuff.
1755 hd->rem_set()->clear();
1756 HeapRegion* next_hd = hd->next_from_unclean_list();
1757 (void)list->pop();
1758 guarantee(list->hd() == next_hd, "how not?");
1759 _g1h->put_region_on_unclean_list(hd);
1760 if (!hd->isHumongous()) {
1761 // Add this to the _free_regions count by 1.
1762 _g1h->finish_free_region_work(0, 0, 1, NULL);
1763 }
1764 hd = list->hd();
1765 guarantee(hd == next_hd, "how not?");
1766 }
1767 }
1768 }
1769
1770
1771 class G1CMIsAliveClosure: public BoolObjectClosure {
1772 G1CollectedHeap* _g1;
1773 public:
1774 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1775 _g1(g1)
1776 {}
1777
1778 void do_object(oop obj) {
1779 assert(false, "not to be invoked");
1780 }
1781 bool do_object_b(oop obj) {
1782 HeapWord* addr = (HeapWord*)obj;
1783 return addr != NULL &&
1784 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1785 }
1786 };
1787
1788 class G1CMKeepAliveClosure: public OopClosure {
1789 G1CollectedHeap* _g1;
1790 ConcurrentMark* _cm;
1791 CMBitMap* _bitMap;
1792 public:
1793 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1794 CMBitMap* bitMap) :
1795 _g1(g1), _cm(cm),
1796 _bitMap(bitMap) {}
1797
1798 void do_oop(narrowOop* p) {
1799 guarantee(false, "NYI");
1800 }
1801
1802 void do_oop(oop* p) {
1803 oop thisOop = *p;
1804 HeapWord* addr = (HeapWord*)thisOop;
1805 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1806 _bitMap->mark(addr);
1807 _cm->mark_stack_push(thisOop);
1808 }
1809 }
1810 };
1811
1812 class G1CMDrainMarkingStackClosure: public VoidClosure {
1813 CMMarkStack* _markStack;
1814 CMBitMap* _bitMap;
1815 G1CMKeepAliveClosure* _oopClosure;
1816 public:
1817 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1818 G1CMKeepAliveClosure* oopClosure) :
1819 _bitMap(bitMap),
1820 _markStack(markStack),
1821 _oopClosure(oopClosure)
1822 {}
1823
1824 void do_void() {
1825 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1826 }
1827 };
1828
1829 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1830 ResourceMark rm;
1831 HandleMark hm;
1832 ReferencePolicy* soft_ref_policy;
1833
1834 // Process weak references.
1835 if (clear_all_soft_refs) {
1836 soft_ref_policy = new AlwaysClearPolicy();
1837 } else {
1838 #ifdef COMPILER2
1839 soft_ref_policy = new LRUMaxHeapPolicy();
1840 #else
1841 soft_ref_policy = new LRUCurrentHeapPolicy();
1842 #endif
1843 }
1844 assert(_markStack.isEmpty(), "mark stack should be empty");
1845
1846 G1CollectedHeap* g1 = G1CollectedHeap::heap();
1847 G1CMIsAliveClosure g1IsAliveClosure(g1);
1848
1849 G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
1850 G1CMDrainMarkingStackClosure
1851 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1852 &g1KeepAliveClosure);
1853
1854 // XXXYYY Also: copy the parallel ref processing code from CMS.
1855 ReferenceProcessor* rp = g1->ref_processor();
1856 rp->process_discovered_references(soft_ref_policy,
1857 &g1IsAliveClosure,
1858 &g1KeepAliveClosure,
1859 &g1DrainMarkingStackClosure,
1860 NULL);
1861 assert(_markStack.overflow() || _markStack.isEmpty(),
1862 "mark stack should be empty (unless it overflowed)");
1863 if (_markStack.overflow()) {
1864 set_has_overflown();
1865 }
1866
1867 rp->enqueue_discovered_references();
1868 rp->verify_no_references_recorded();
1869 assert(!rp->discovery_enabled(), "should have been disabled");
1870
1871 // Now clean up stale oops in SymbolTable and StringTable
1872 SymbolTable::unlink(&g1IsAliveClosure);
1873 StringTable::unlink(&g1IsAliveClosure);
1874 }
1875
1876 void ConcurrentMark::swapMarkBitMaps() {
1877 CMBitMapRO* temp = _prevMarkBitMap;
1878 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1879 _nextMarkBitMap = (CMBitMap*) temp;
1880 }
1881
1882 class CMRemarkTask: public AbstractGangTask {
1883 private:
1884 ConcurrentMark *_cm;
1885
1886 public:
1887 void work(int worker_i) {
1888 // Since all available tasks are actually started, we should
1889 // only proceed if we're supposed to be actived.
1890 if ((size_t)worker_i < _cm->active_tasks()) {
1891 CMTask* task = _cm->task(worker_i);
1892 task->record_start_time();
1893 do {
1894 task->do_marking_step(1000000000.0 /* something very large */);
1895 } while (task->has_aborted() && !_cm->has_overflown());
1896 // If we overflow, then we do not want to restart. We instead
1897 // want to abort remark and do concurrent marking again.
1898 task->record_end_time();
1899 }
1900 }
1901
1902 CMRemarkTask(ConcurrentMark* cm) :
1903 AbstractGangTask("Par Remark"), _cm(cm) { }
1904 };
1905
1906 void ConcurrentMark::checkpointRootsFinalWork() {
1907 ResourceMark rm;
1908 HandleMark hm;
1909 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1910
1911 g1h->ensure_parsability(false);
1912
1913 if (ParallelGCThreads > 0) {
1914 g1h->change_strong_roots_parity();
1915 // this is remark, so we'll use up all available threads
1916 int active_workers = ParallelGCThreads;
1917 set_phase(active_workers, false);
1918
1919 CMRemarkTask remarkTask(this);
1920 // We will start all available threads, even if we decide that the
1921 // active_workers will be fewer. The extra ones will just bail out
1922 // immediately.
1923 int n_workers = g1h->workers()->total_workers();
1924 g1h->set_par_threads(n_workers);
1925 g1h->workers()->run_task(&remarkTask);
1926 g1h->set_par_threads(0);
1927
1928 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1929 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1930 } else {
1931 g1h->change_strong_roots_parity();
1932 // this is remark, so we'll use up all available threads
1933 int active_workers = 1;
1934 set_phase(active_workers, false);
1935
1936 CMRemarkTask remarkTask(this);
1937 // We will start all available threads, even if we decide that the
1938 // active_workers will be fewer. The extra ones will just bail out
1939 // immediately.
1940 remarkTask.work(0);
1941
1942 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1943 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1944 }
1945
1946 print_stats();
1947
1948 if (!restart_for_overflow())
1949 set_non_marking_state();
1950
1951 #if VERIFY_OBJS_PROCESSED
1952 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1953 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1954 _scan_obj_cl.objs_processed,
1955 ThreadLocalObjQueue::objs_enqueued);
1956 guarantee(_scan_obj_cl.objs_processed ==
1957 ThreadLocalObjQueue::objs_enqueued,
1958 "Different number of objs processed and enqueued.");
1959 }
1960 #endif
1961 }
1962
1963 class ReachablePrinterOopClosure: public OopClosure {
1964 private:
1965 G1CollectedHeap* _g1h;
1966 CMBitMapRO* _bitmap;
1967 outputStream* _out;
1968
1969 public:
1970 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1971 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1972
1973 void do_oop(narrowOop* p) {
1974 guarantee(false, "NYI");
1975 }
1976
1977 void do_oop(oop* p) {
1978 oop obj = *p;
1979 const char* str = NULL;
1980 const char* str2 = "";
1981
1982 if (!_g1h->is_in_g1_reserved(obj))
1983 str = "outside G1 reserved";
1984 else {
1985 HeapRegion* hr = _g1h->heap_region_containing(obj);
1986 guarantee( hr != NULL, "invariant" );
1987 if (hr->obj_allocated_since_prev_marking(obj)) {
1988 str = "over TAMS";
1989 if (_bitmap->isMarked((HeapWord*) obj))
1990 str2 = " AND MARKED";
1991 } else if (_bitmap->isMarked((HeapWord*) obj))
1992 str = "marked";
1993 else
1994 str = "#### NOT MARKED ####";
1995 }
1996
1997 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1998 p, (void*) obj, str, str2);
1999 }
2000 };
2001
2002 class ReachablePrinterClosure: public BitMapClosure {
2003 private:
2004 CMBitMapRO* _bitmap;
2005 outputStream* _out;
2006
2007 public:
2008 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2009 _bitmap(bitmap), _out(out) { }
2010
2011 bool do_bit(size_t offset) {
2012 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2013 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2014
2015 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2016 oop(addr)->oop_iterate(&oopCl);
2017 _out->print_cr("");
2018
2019 return true;
2020 }
2021 };
2022
2023 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2024 private:
2025 CMBitMapRO* _bitmap;
2026 outputStream* _out;
2027
2028 public:
2029 void do_object(oop o) {
2030 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2031
2032 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2033 o->oop_iterate(&oopCl);
2034 _out->print_cr("");
2035 }
2036
2037 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2038 _bitmap(bitmap), _out(out) { }
2039 };
2040
2041 class RegionReachablePrinterClosure : public HeapRegionClosure {
2042 private:
2043 CMBitMapRO* _bitmap;
2044 outputStream* _out;
2045
2046 public:
2047 bool doHeapRegion(HeapRegion* hr) {
2048 HeapWord* b = hr->bottom();
2049 HeapWord* e = hr->end();
2050 HeapWord* t = hr->top();
2051 HeapWord* p = hr->prev_top_at_mark_start();
2052 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2053 "PTAMS: "PTR_FORMAT, b, e, t, p);
2054 _out->print_cr("");
2055
2056 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2057 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2058
2059 return false;
2060 }
2061
2062 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2063 outputStream* out) :
2064 _bitmap(bitmap), _out(out) { }
2065 };
2066
2067 void ConcurrentMark::print_prev_bitmap_reachable() {
2068 outputStream* out = gclog_or_tty;
2069
2070 #if SEND_HEAP_DUMP_TO_FILE
2071 guarantee(heap_dump_file == NULL, "Protocol");
2072 char fn_buf[100];
2073 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2074 heap_dump_file = fopen(fn_buf, "w");
2075 fileStream fstream(heap_dump_file);
2076 out = &fstream;
2077 #endif // SEND_HEAP_DUMP_TO_FILE
2078
2079 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2080 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2081 _g1h->heap_region_iterate(&rcl);
2082 out->print_cr("");
2083
2084 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2085 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2086 _prevMarkBitMap->iterate(&cl);
2087 out->print_cr("");
2088
2089 #if SEND_HEAP_DUMP_TO_FILE
2090 fclose(heap_dump_file);
2091 heap_dump_file = NULL;
2092 #endif // SEND_HEAP_DUMP_TO_FILE
2093 }
2094
2095 // This note is for drainAllSATBBuffers and the code in between.
2096 // In the future we could reuse a task to do this work during an
2097 // evacuation pause (since now tasks are not active and can be claimed
2098 // during an evacuation pause). This was a late change to the code and
2099 // is currently not being taken advantage of.
2100
2101 class CMGlobalObjectClosure : public ObjectClosure {
2102 private:
2103 ConcurrentMark* _cm;
2104
2105 public:
2106 void do_object(oop obj) {
2107 _cm->deal_with_reference(obj);
2108 }
2109
2110 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2111 };
2112
2113 void ConcurrentMark::deal_with_reference(oop obj) {
2114 if (verbose_high())
2115 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2116 (void*) obj);
2117
2118
2119 HeapWord* objAddr = (HeapWord*) obj;
2120 if (_g1h->is_in_g1_reserved(objAddr)) {
2121 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2122 HeapRegion* hr = _g1h->heap_region_containing(obj);
2123 if (_g1h->is_obj_ill(obj, hr)) {
2124 if (verbose_high())
2125 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2126 "marked", (void*) obj);
2127
2128 // we need to mark it first
2129 if (_nextMarkBitMap->parMark(objAddr)) {
2130 // No OrderAccess:store_load() is needed. It is implicit in the
2131 // CAS done in parMark(objAddr) above
2132 HeapWord* finger = _finger;
2133 if (objAddr < finger) {
2134 if (verbose_high())
2135 gclog_or_tty->print_cr("[global] below the global finger "
2136 "("PTR_FORMAT"), pushing it", finger);
2137 if (!mark_stack_push(obj)) {
2138 if (verbose_low())
2139 gclog_or_tty->print_cr("[global] global stack overflow during "
2140 "deal_with_reference");
2141 }
2142 }
2143 }
2144 }
2145 }
2146 }
2147
2148 void ConcurrentMark::drainAllSATBBuffers() {
2149 CMGlobalObjectClosure oc(this);
2150 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2151 satb_mq_set.set_closure(&oc);
2152
2153 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2154 if (verbose_medium())
2155 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2156 }
2157
2158 // no need to check whether we should do this, as this is only
2159 // called during an evacuation pause
2160 satb_mq_set.iterate_closure_all_threads();
2161
2162 satb_mq_set.set_closure(NULL);
2163 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2164 }
2165
2166 void ConcurrentMark::markPrev(oop p) {
2167 // Note we are overriding the read-only view of the prev map here, via
2168 // the cast.
2169 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2170 }
2171
2172 void ConcurrentMark::clear(oop p) {
2173 assert(p != NULL && p->is_oop(), "expected an oop");
2174 HeapWord* addr = (HeapWord*)p;
2175 assert(addr >= _nextMarkBitMap->startWord() ||
2176 addr < _nextMarkBitMap->endWord(), "in a region");
2177
2178 _nextMarkBitMap->clear(addr);
2179 }
2180
2181 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2182 // Note we are overriding the read-only view of the prev map here, via
2183 // the cast.
2184 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2185 _nextMarkBitMap->clearRange(mr);
2186 }
2187
2188 HeapRegion*
2189 ConcurrentMark::claim_region(int task_num) {
2190 // "checkpoint" the finger
2191 HeapWord* finger = _finger;
2192
2193 // _heap_end will not change underneath our feet; it only changes at
2194 // yield points.
2195 while (finger < _heap_end) {
2196 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2197
2198 // is the gap between reading the finger and doing the CAS too long?
2199
2200 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2201 HeapWord* bottom = curr_region->bottom();
2202 HeapWord* end = curr_region->end();
2203 HeapWord* limit = curr_region->next_top_at_mark_start();
2204
2205 if (verbose_low())
2206 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2207 "["PTR_FORMAT", "PTR_FORMAT"), "
2208 "limit = "PTR_FORMAT,
2209 task_num, curr_region, bottom, end, limit);
2210
2211 HeapWord* res =
2212 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2213 if (res == finger) {
2214 // we succeeded
2215
2216 // notice that _finger == end cannot be guaranteed here since,
2217 // someone else might have moved the finger even further
2218 guarantee( _finger >= end, "the finger should have moved forward" );
2219
2220 if (verbose_low())
2221 gclog_or_tty->print_cr("[%d] we were successful with region = "
2222 PTR_FORMAT, task_num, curr_region);
2223
2224 if (limit > bottom) {
2225 if (verbose_low())
2226 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2227 "returning it ", task_num, curr_region);
2228 return curr_region;
2229 } else {
2230 tmp_guarantee_CM( limit == bottom,
2231 "the region limit should be at bottom" );
2232 if (verbose_low())
2233 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2234 "returning NULL", task_num, curr_region);
2235 // we return NULL and the caller should try calling
2236 // claim_region() again.
2237 return NULL;
2238 }
2239 } else {
2240 guarantee( _finger > finger, "the finger should have moved forward" );
2241 if (verbose_low())
2242 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2243 "global finger = "PTR_FORMAT", "
2244 "our finger = "PTR_FORMAT,
2245 task_num, _finger, finger);
2246
2247 // read it again
2248 finger = _finger;
2249 }
2250 }
2251
2252 return NULL;
2253 }
2254
2255 void ConcurrentMark::oops_do(OopClosure* cl) {
2256 if (_markStack.size() > 0 && verbose_low())
2257 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2258 "size = %d", _markStack.size());
2259 // we first iterate over the contents of the mark stack...
2260 _markStack.oops_do(cl);
2261
2262 for (int i = 0; i < (int)_max_task_num; ++i) {
2263 OopTaskQueue* queue = _task_queues->queue((int)i);
2264
2265 if (queue->size() > 0 && verbose_low())
2266 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2267 "size = %d", i, queue->size());
2268
2269 // ...then over the contents of the all the task queues.
2270 queue->oops_do(cl);
2271 }
2272
2273 // finally, invalidate any entries that in the region stack that
2274 // point into the collection set
2275 if (_regionStack.invalidate_entries_into_cset()) {
2276 // otherwise, any gray objects copied during the evacuation pause
2277 // might not be visited.
2278 guarantee( _should_gray_objects, "invariant" );
2279 }
2280 }
2281
2282 void ConcurrentMark::clear_marking_state() {
2283 _markStack.setEmpty();
2284 _markStack.clear_overflow();
2285 _regionStack.setEmpty();
2286 _regionStack.clear_overflow();
2287 clear_has_overflown();
2288 _finger = _heap_start;
2289
2290 for (int i = 0; i < (int)_max_task_num; ++i) {
2291 OopTaskQueue* queue = _task_queues->queue(i);
2292 queue->set_empty();
2293 }
2294 }
2295
2296 void ConcurrentMark::print_stats() {
2297 if (verbose_stats()) {
2298 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2299 for (size_t i = 0; i < _active_tasks; ++i) {
2300 _tasks[i]->print_stats();
2301 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2302 }
2303 }
2304 }
2305
2306 class CSMarkOopClosure: public OopClosure {
2307 friend class CSMarkBitMapClosure;
2308
2309 G1CollectedHeap* _g1h;
2310 CMBitMap* _bm;
2311 ConcurrentMark* _cm;
2312 oop* _ms;
2313 jint* _array_ind_stack;
2314 int _ms_size;
2315 int _ms_ind;
2316 int _array_increment;
2317
2318 bool push(oop obj, int arr_ind = 0) {
2319 if (_ms_ind == _ms_size) {
2320 gclog_or_tty->print_cr("Mark stack is full.");
2321 return false;
2322 }
2323 _ms[_ms_ind] = obj;
2324 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2325 _ms_ind++;
2326 return true;
2327 }
2328
2329 oop pop() {
2330 if (_ms_ind == 0) return NULL;
2331 else {
2332 _ms_ind--;
2333 return _ms[_ms_ind];
2334 }
2335 }
2336
2337 bool drain() {
2338 while (_ms_ind > 0) {
2339 oop obj = pop();
2340 assert(obj != NULL, "Since index was non-zero.");
2341 if (obj->is_objArray()) {
2342 jint arr_ind = _array_ind_stack[_ms_ind];
2343 objArrayOop aobj = objArrayOop(obj);
2344 jint len = aobj->length();
2345 jint next_arr_ind = arr_ind + _array_increment;
2346 if (next_arr_ind < len) {
2347 push(obj, next_arr_ind);
2348 }
2349 // Now process this portion of this one.
2350 int lim = MIN2(next_arr_ind, len);
2351 assert(!UseCompressedOops, "This needs to be fixed");
2352 for (int j = arr_ind; j < lim; j++) {
2353 do_oop(aobj->obj_at_addr<oop>(j));
2354 }
2355
2356 } else {
2357 obj->oop_iterate(this);
2358 }
2359 if (abort()) return false;
2360 }
2361 return true;
2362 }
2363
2364 public:
2365 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2366 _g1h(G1CollectedHeap::heap()),
2367 _cm(cm),
2368 _bm(cm->nextMarkBitMap()),
2369 _ms_size(ms_size), _ms_ind(0),
2370 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2371 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2372 _array_increment(MAX2(ms_size/8, 16))
2373 {}
2374
2375 ~CSMarkOopClosure() {
2376 FREE_C_HEAP_ARRAY(oop, _ms);
2377 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2378 }
2379
2380 void do_oop(narrowOop* p) {
2381 guarantee(false, "NYI");
2382 }
2383
2384 void do_oop(oop* p) {
2385 oop obj = *p;
2386 if (obj == NULL) return;
2387 if (obj->is_forwarded()) {
2388 // If the object has already been forwarded, we have to make sure
2389 // that it's marked. So follow the forwarding pointer. Note that
2390 // this does the right thing for self-forwarding pointers in the
2391 // evacuation failure case.
2392 obj = obj->forwardee();
2393 }
2394 HeapRegion* hr = _g1h->heap_region_containing(obj);
2395 if (hr != NULL) {
2396 if (hr->in_collection_set()) {
2397 if (_g1h->is_obj_ill(obj)) {
2398 _bm->mark((HeapWord*)obj);
2399 if (!push(obj)) {
2400 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2401 set_abort();
2402 }
2403 }
2404 } else {
2405 // Outside the collection set; we need to gray it
2406 _cm->deal_with_reference(obj);
2407 }
2408 }
2409 }
2410 };
2411
2412 class CSMarkBitMapClosure: public BitMapClosure {
2413 G1CollectedHeap* _g1h;
2414 CMBitMap* _bitMap;
2415 ConcurrentMark* _cm;
2416 CSMarkOopClosure _oop_cl;
2417 public:
2418 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2419 _g1h(G1CollectedHeap::heap()),
2420 _bitMap(cm->nextMarkBitMap()),
2421 _oop_cl(cm, ms_size)
2422 {}
2423
2424 ~CSMarkBitMapClosure() {}
2425
2426 bool do_bit(size_t offset) {
2427 // convert offset into a HeapWord*
2428 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2429 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2430 "address out of range");
2431 assert(_bitMap->isMarked(addr), "tautology");
2432 oop obj = oop(addr);
2433 if (!obj->is_forwarded()) {
2434 if (!_oop_cl.push(obj)) return false;
2435 if (!_oop_cl.drain()) return false;
2436 }
2437 // Otherwise...
2438 return true;
2439 }
2440 };
2441
2442
2443 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2444 CMBitMap* _bm;
2445 CSMarkBitMapClosure _bit_cl;
2446 enum SomePrivateConstants {
2447 MSSize = 1000
2448 };
2449 bool _completed;
2450 public:
2451 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2452 _bm(cm->nextMarkBitMap()),
2453 _bit_cl(cm, MSSize),
2454 _completed(true)
2455 {}
2456
2457 ~CompleteMarkingInCSHRClosure() {}
2458
2459 bool doHeapRegion(HeapRegion* r) {
2460 if (!r->evacuation_failed()) {
2461 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2462 if (!mr.is_empty()) {
2463 if (!_bm->iterate(&_bit_cl, mr)) {
2464 _completed = false;
2465 return true;
2466 }
2467 }
2468 }
2469 return false;
2470 }
2471
2472 bool completed() { return _completed; }
2473 };
2474
2475 class ClearMarksInHRClosure: public HeapRegionClosure {
2476 CMBitMap* _bm;
2477 public:
2478 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2479
2480 bool doHeapRegion(HeapRegion* r) {
2481 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2482 MemRegion usedMR = r->used_region();
2483 _bm->clearRange(r->used_region());
2484 }
2485 return false;
2486 }
2487 };
2488
2489 void ConcurrentMark::complete_marking_in_collection_set() {
2490 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2491
2492 if (!g1h->mark_in_progress()) {
2493 g1h->g1_policy()->record_mark_closure_time(0.0);
2494 return;
2495 }
2496
2497 int i = 1;
2498 double start = os::elapsedTime();
2499 while (true) {
2500 i++;
2501 CompleteMarkingInCSHRClosure cmplt(this);
2502 g1h->collection_set_iterate(&cmplt);
2503 if (cmplt.completed()) break;
2504 }
2505 double end_time = os::elapsedTime();
2506 double elapsed_time_ms = (end_time - start) * 1000.0;
2507 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2508 if (PrintGCDetails) {
2509 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2510 }
2511
2512 ClearMarksInHRClosure clr(nextMarkBitMap());
2513 g1h->collection_set_iterate(&clr);
2514 }
2515
2516 // The next two methods deal with the following optimisation. Some
2517 // objects are gray by being marked and located above the finger. If
2518 // they are copied, during an evacuation pause, below the finger then
2519 // the need to be pushed on the stack. The observation is that, if
2520 // there are no regions in the collection set located above the
2521 // finger, then the above cannot happen, hence we do not need to
2522 // explicitly gray any objects when copying them to below the
2523 // finger. The global stack will be scanned to ensure that, if it
2524 // points to objects being copied, it will update their
2525 // location. There is a tricky situation with the gray objects in
2526 // region stack that are being coped, however. See the comment in
2527 // newCSet().
2528
2529 void ConcurrentMark::newCSet() {
2530 if (!concurrent_marking_in_progress())
2531 // nothing to do if marking is not in progress
2532 return;
2533
2534 // find what the lowest finger is among the global and local fingers
2535 _min_finger = _finger;
2536 for (int i = 0; i < (int)_max_task_num; ++i) {
2537 CMTask* task = _tasks[i];
2538 HeapWord* task_finger = task->finger();
2539 if (task_finger != NULL && task_finger < _min_finger)
2540 _min_finger = task_finger;
2541 }
2542
2543 _should_gray_objects = false;
2544
2545 // This fixes a very subtle and fustrating bug. It might be the case
2546 // that, during en evacuation pause, heap regions that contain
2547 // objects that are gray (by being in regions contained in the
2548 // region stack) are included in the collection set. Since such gray
2549 // objects will be moved, and because it's not easy to redirect
2550 // region stack entries to point to a new location (because objects
2551 // in one region might be scattered to multiple regions after they
2552 // are copied), one option is to ensure that all marked objects
2553 // copied during a pause are pushed on the stack. Notice, however,
2554 // that this problem can only happen when the region stack is not
2555 // empty during an evacuation pause. So, we make the fix a bit less
2556 // conservative and ensure that regions are pushed on the stack,
2557 // irrespective whether all collection set regions are below the
2558 // finger, if the region stack is not empty. This is expected to be
2559 // a rare case, so I don't think it's necessary to be smarted about it.
2560 if (!region_stack_empty())
2561 _should_gray_objects = true;
2562 }
2563
2564 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2565 if (!concurrent_marking_in_progress())
2566 return;
2567
2568 HeapWord* region_end = hr->end();
2569 if (region_end > _min_finger)
2570 _should_gray_objects = true;
2571 }
2572
2573 void ConcurrentMark::disable_co_trackers() {
2574 if (has_aborted()) {
2575 if (_cleanup_co_tracker.enabled())
2576 _cleanup_co_tracker.disable();
2577 for (int i = 0; i < (int)_max_task_num; ++i) {
2578 CMTask* task = _tasks[i];
2579 if (task->co_tracker_enabled())
2580 task->disable_co_tracker();
2581 }
2582 } else {
2583 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2584 for (int i = 0; i < (int)_max_task_num; ++i) {
2585 CMTask* task = _tasks[i];
2586 guarantee( !task->co_tracker_enabled(), "invariant" );
2587 }
2588 }
2589 }
2590
2591 // abandon current marking iteration due to a Full GC
2592 void ConcurrentMark::abort() {
2593 // If we're not marking, nothing to do.
2594 if (!G1ConcMark) return;
2595
2596 // Clear all marks to force marking thread to do nothing
2597 _nextMarkBitMap->clearAll();
2598 // Empty mark stack
2599 clear_marking_state();
2600 for (int i = 0; i < (int)_max_task_num; ++i)
2601 _tasks[i]->clear_region_fields();
2602 _has_aborted = true;
2603
2604 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2605 satb_mq_set.abandon_partial_marking();
2606 satb_mq_set.set_active_all_threads(false);
2607 }
2608
2609 static void print_ms_time_info(const char* prefix, const char* name,
2610 NumberSeq& ns) {
2611 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2612 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2613 if (ns.num() > 0) {
2614 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2615 prefix, ns.sd(), ns.maximum());
2616 }
2617 }
2618
2619 void ConcurrentMark::print_summary_info() {
2620 gclog_or_tty->print_cr(" Concurrent marking:");
2621 print_ms_time_info(" ", "init marks", _init_times);
2622 print_ms_time_info(" ", "remarks", _remark_times);
2623 {
2624 print_ms_time_info(" ", "final marks", _remark_mark_times);
2625 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2626
2627 }
2628 print_ms_time_info(" ", "cleanups", _cleanup_times);
2629 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2630 _total_counting_time,
2631 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2632 (double)_cleanup_times.num()
2633 : 0.0));
2634 if (G1ScrubRemSets) {
2635 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2636 _total_rs_scrub_time,
2637 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2638 (double)_cleanup_times.num()
2639 : 0.0));
2640 }
2641 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2642 (_init_times.sum() + _remark_times.sum() +
2643 _cleanup_times.sum())/1000.0);
2644 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2645 "(%8.2f s marking, %8.2f s counting).",
2646 cmThread()->vtime_accum(),
2647 cmThread()->vtime_mark_accum(),
2648 cmThread()->vtime_count_accum());
2649 }
2650
2651 // Closures
2652 // XXX: there seems to be a lot of code duplication here;
2653 // should refactor and consolidate the shared code.
2654
2655 // This closure is used to mark refs into the CMS generation in
2656 // the CMS bit map. Called at the first checkpoint.
2657
2658 // We take a break if someone is trying to stop the world.
2659 bool ConcurrentMark::do_yield_check(int worker_i) {
2660 if (should_yield()) {
2661 if (worker_i == 0)
2662 _g1h->g1_policy()->record_concurrent_pause();
2663 cmThread()->yield();
2664 if (worker_i == 0)
2665 _g1h->g1_policy()->record_concurrent_pause_end();
2666 return true;
2667 } else {
2668 return false;
2669 }
2670 }
2671
2672 bool ConcurrentMark::should_yield() {
2673 return cmThread()->should_yield();
2674 }
2675
2676 bool ConcurrentMark::containing_card_is_marked(void* p) {
2677 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2678 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2679 }
2680
2681 bool ConcurrentMark::containing_cards_are_marked(void* start,
2682 void* last) {
2683 return
2684 containing_card_is_marked(start) &&
2685 containing_card_is_marked(last);
2686 }
2687
2688 #ifndef PRODUCT
2689 // for debugging purposes
2690 void ConcurrentMark::print_finger() {
2691 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2692 _heap_start, _heap_end, _finger);
2693 for (int i = 0; i < (int) _max_task_num; ++i) {
2694 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2695 }
2696 gclog_or_tty->print_cr("");
2697 }
2698 #endif
2699
2700 // Closure for iteration over bitmaps
2701 class CMBitMapClosure : public BitMapClosure {
2702 private:
2703 // the bitmap that is being iterated over
2704 CMBitMap* _nextMarkBitMap;
2705 ConcurrentMark* _cm;
2706 CMTask* _task;
2707 // true if we're scanning a heap region claimed by the task (so that
2708 // we move the finger along), false if we're not, i.e. currently when
2709 // scanning a heap region popped from the region stack (so that we
2710 // do not move the task finger along; it'd be a mistake if we did so).
2711 bool _scanning_heap_region;
2712
2713 public:
2714 CMBitMapClosure(CMTask *task,
2715 ConcurrentMark* cm,
2716 CMBitMap* nextMarkBitMap)
2717 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2718
2719 void set_scanning_heap_region(bool scanning_heap_region) {
2720 _scanning_heap_region = scanning_heap_region;
2721 }
2722
2723 bool do_bit(size_t offset) {
2724 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2725 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2726 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2727
2728 if (_scanning_heap_region) {
2729 statsOnly( _task->increase_objs_found_on_bitmap() );
2730 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2731 // We move that task's local finger along.
2732 _task->move_finger_to(addr);
2733 } else {
2734 // We move the task's region finger along.
2735 _task->move_region_finger_to(addr);
2736 }
2737
2738 _task->scan_object(oop(addr));
2739 // we only partially drain the local queue and global stack
2740 _task->drain_local_queue(true);
2741 _task->drain_global_stack(true);
2742
2743 // if the has_aborted flag has been raised, we need to bail out of
2744 // the iteration
2745 return !_task->has_aborted();
2746 }
2747 };
2748
2749 // Closure for iterating over objects, currently only used for
2750 // processing SATB buffers.
2751 class CMObjectClosure : public ObjectClosure {
2752 private:
2753 CMTask* _task;
2754
2755 public:
2756 void do_object(oop obj) {
2757 _task->deal_with_reference(obj);
2758 }
2759
2760 CMObjectClosure(CMTask* task) : _task(task) { }
2761 };
2762
2763 // Closure for iterating over object fields
2764 class CMOopClosure : public OopClosure {
2765 private:
2766 G1CollectedHeap* _g1h;
2767 ConcurrentMark* _cm;
2768 CMTask* _task;
2769
2770 public:
2771 void do_oop(narrowOop* p) {
2772 guarantee(false, "NYI");
2773 }
2774
2775 void do_oop(oop* p) {
2776 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2777
2778 oop obj = *p;
2779 if (_cm->verbose_high())
2780 gclog_or_tty->print_cr("[%d] we're looking at location "
2781 "*"PTR_FORMAT" = "PTR_FORMAT,
2782 _task->task_id(), p, (void*) obj);
2783 _task->deal_with_reference(obj);
2784 }
2785
2786 CMOopClosure(G1CollectedHeap* g1h,
2787 ConcurrentMark* cm,
2788 CMTask* task)
2789 : _g1h(g1h), _cm(cm), _task(task) { }
2790 };
2791
2792 void CMTask::setup_for_region(HeapRegion* hr) {
2793 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2794 "claim_region() should have filtered out continues humongous regions" );
2795
2796 if (_cm->verbose_low())
2797 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2798 _task_id, hr);
2799
2800 _curr_region = hr;
2801 _finger = hr->bottom();
2802 update_region_limit();
2803 }
2804
2805 void CMTask::update_region_limit() {
2806 HeapRegion* hr = _curr_region;
2807 HeapWord* bottom = hr->bottom();
2808 HeapWord* limit = hr->next_top_at_mark_start();
2809
2810 if (limit == bottom) {
2811 if (_cm->verbose_low())
2812 gclog_or_tty->print_cr("[%d] found an empty region "
2813 "["PTR_FORMAT", "PTR_FORMAT")",
2814 _task_id, bottom, limit);
2815 // The region was collected underneath our feet.
2816 // We set the finger to bottom to ensure that the bitmap
2817 // iteration that will follow this will not do anything.
2818 // (this is not a condition that holds when we set the region up,
2819 // as the region is not supposed to be empty in the first place)
2820 _finger = bottom;
2821 } else if (limit >= _region_limit) {
2822 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2823 } else {
2824 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2825 // This can happen under some pretty unusual circumstances. An
2826 // evacuation pause empties the region underneath our feet (NTAMS
2827 // at bottom). We then do some allocation in the region (NTAMS
2828 // stays at bottom), followed by the region being used as a GC
2829 // alloc region (NTAMS will move to top() and the objects
2830 // originally below it will be grayed). All objects now marked in
2831 // the region are explicitly grayed, if below the global finger,
2832 // and we do not need in fact to scan anything else. So, we simply
2833 // set _finger to be limit to ensure that the bitmap iteration
2834 // doesn't do anything.
2835 _finger = limit;
2836 }
2837
2838 _region_limit = limit;
2839 }
2840
2841 void CMTask::giveup_current_region() {
2842 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2843 if (_cm->verbose_low())
2844 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2845 _task_id, _curr_region);
2846 clear_region_fields();
2847 }
2848
2849 void CMTask::clear_region_fields() {
2850 // Values for these three fields that indicate that we're not
2851 // holding on to a region.
2852 _curr_region = NULL;
2853 _finger = NULL;
2854 _region_limit = NULL;
2855
2856 _region_finger = NULL;
2857 }
2858
2859 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2860 guarantee( nextMarkBitMap != NULL, "invariant" );
2861
2862 if (_cm->verbose_low())
2863 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2864
2865 _nextMarkBitMap = nextMarkBitMap;
2866 clear_region_fields();
2867
2868 _calls = 0;
2869 _elapsed_time_ms = 0.0;
2870 _termination_time_ms = 0.0;
2871 _termination_start_time_ms = 0.0;
2872
2873 #if _MARKING_STATS_
2874 _local_pushes = 0;
2875 _local_pops = 0;
2876 _local_max_size = 0;
2877 _objs_scanned = 0;
2878 _global_pushes = 0;
2879 _global_pops = 0;
2880 _global_max_size = 0;
2881 _global_transfers_to = 0;
2882 _global_transfers_from = 0;
2883 _region_stack_pops = 0;
2884 _regions_claimed = 0;
2885 _objs_found_on_bitmap = 0;
2886 _satb_buffers_processed = 0;
2887 _steal_attempts = 0;
2888 _steals = 0;
2889 _aborted = 0;
2890 _aborted_overflow = 0;
2891 _aborted_cm_aborted = 0;
2892 _aborted_yield = 0;
2893 _aborted_timed_out = 0;
2894 _aborted_satb = 0;
2895 _aborted_termination = 0;
2896 #endif // _MARKING_STATS_
2897 }
2898
2899 bool CMTask::should_exit_termination() {
2900 regular_clock_call();
2901 // This is called when we are in the termination protocol. We should
2902 // quit if, for some reason, this task wants to abort or the global
2903 // stack is not empty (this means that we can get work from it).
2904 return !_cm->mark_stack_empty() || has_aborted();
2905 }
2906
2907 // This determines whether the method below will check both the local
2908 // and global fingers when determining whether to push on the stack a
2909 // gray object (value 1) or whether it will only check the global one
2910 // (value 0). The tradeoffs are that the former will be a bit more
2911 // accurate and possibly push less on the stack, but it might also be
2912 // a little bit slower.
2913
2914 #define _CHECK_BOTH_FINGERS_ 1
2915
2916 void CMTask::deal_with_reference(oop obj) {
2917 if (_cm->verbose_high())
2918 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2919 _task_id, (void*) obj);
2920
2921 ++_refs_reached;
2922
2923 HeapWord* objAddr = (HeapWord*) obj;
2924 if (_g1h->is_in_g1_reserved(objAddr)) {
2925 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2926 HeapRegion* hr = _g1h->heap_region_containing(obj);
2927 if (_g1h->is_obj_ill(obj, hr)) {
2928 if (_cm->verbose_high())
2929 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2930 _task_id, (void*) obj);
2931
2932 // we need to mark it first
2933 if (_nextMarkBitMap->parMark(objAddr)) {
2934 // No OrderAccess:store_load() is needed. It is implicit in the
2935 // CAS done in parMark(objAddr) above
2936 HeapWord* global_finger = _cm->finger();
2937
2938 #if _CHECK_BOTH_FINGERS_
2939 // we will check both the local and global fingers
2940
2941 if (_finger != NULL && objAddr < _finger) {
2942 if (_cm->verbose_high())
2943 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2944 "pushing it", _task_id, _finger);
2945 push(obj);
2946 } else if (_curr_region != NULL && objAddr < _region_limit) {
2947 // do nothing
2948 } else if (objAddr < global_finger) {
2949 // Notice that the global finger might be moving forward
2950 // concurrently. This is not a problem. In the worst case, we
2951 // mark the object while it is above the global finger and, by
2952 // the time we read the global finger, it has moved forward
2953 // passed this object. In this case, the object will probably
2954 // be visited when a task is scanning the region and will also
2955 // be pushed on the stack. So, some duplicate work, but no
2956 // correctness problems.
2957
2958 if (_cm->verbose_high())
2959 gclog_or_tty->print_cr("[%d] below the global finger "
2960 "("PTR_FORMAT"), pushing it",
2961 _task_id, global_finger);
2962 push(obj);
2963 } else {
2964 // do nothing
2965 }
2966 #else // _CHECK_BOTH_FINGERS_
2967 // we will only check the global finger
2968
2969 if (objAddr < global_finger) {
2970 // see long comment above
2971
2972 if (_cm->verbose_high())
2973 gclog_or_tty->print_cr("[%d] below the global finger "
2974 "("PTR_FORMAT"), pushing it",
2975 _task_id, global_finger);
2976 push(obj);
2977 }
2978 #endif // _CHECK_BOTH_FINGERS_
2979 }
2980 }
2981 }
2982 }
2983
2984 void CMTask::push(oop obj) {
2985 HeapWord* objAddr = (HeapWord*) obj;
2986 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2987 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2988 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2989
2990 if (_cm->verbose_high())
2991 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2992
2993 if (!_task_queue->push(obj)) {
2994 // The local task queue looks full. We need to push some entries
2995 // to the global stack.
2996
2997 if (_cm->verbose_medium())
2998 gclog_or_tty->print_cr("[%d] task queue overflow, "
2999 "moving entries to the global stack",
3000 _task_id);
3001 move_entries_to_global_stack();
3002
3003 // this should succeed since, even if we overflow the global
3004 // stack, we should have definitely removed some entries from the
3005 // local queue. So, there must be space on it.
3006 bool success = _task_queue->push(obj);
3007 tmp_guarantee_CM( success, "invariant" );
3008 }
3009
3010 statsOnly( int tmp_size = _task_queue->size();
3011 if (tmp_size > _local_max_size)
3012 _local_max_size = tmp_size;
3013 ++_local_pushes );
3014 }
3015
3016 void CMTask::reached_limit() {
3017 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3018 _refs_reached >= _refs_reached_limit ,
3019 "shouldn't have been called otherwise" );
3020 regular_clock_call();
3021 }
3022
3023 void CMTask::regular_clock_call() {
3024 if (has_aborted())
3025 return;
3026
3027 // First, we need to recalculate the words scanned and refs reached
3028 // limits for the next clock call.
3029 recalculate_limits();
3030
3031 // During the regular clock call we do the following
3032
3033 // (1) If an overflow has been flagged, then we abort.
3034 if (_cm->has_overflown()) {
3035 set_has_aborted();
3036 return;
3037 }
3038
3039 // If we are not concurrent (i.e. we're doing remark) we don't need
3040 // to check anything else. The other steps are only needed during
3041 // the concurrent marking phase.
3042 if (!concurrent())
3043 return;
3044
3045 // (2) If marking has been aborted for Full GC, then we also abort.
3046 if (_cm->has_aborted()) {
3047 set_has_aborted();
3048 statsOnly( ++_aborted_cm_aborted );
3049 return;
3050 }
3051
3052 double curr_time_ms = os::elapsedVTime() * 1000.0;
3053
3054 // (3) If marking stats are enabled, then we update the step history.
3055 #if _MARKING_STATS_
3056 if (_words_scanned >= _words_scanned_limit)
3057 ++_clock_due_to_scanning;
3058 if (_refs_reached >= _refs_reached_limit)
3059 ++_clock_due_to_marking;
3060
3061 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3062 _interval_start_time_ms = curr_time_ms;
3063 _all_clock_intervals_ms.add(last_interval_ms);
3064
3065 if (_cm->verbose_medium()) {
3066 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3067 "scanned = %d%s, refs reached = %d%s",
3068 _task_id, last_interval_ms,
3069 _words_scanned,
3070 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3071 _refs_reached,
3072 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3073 }
3074 #endif // _MARKING_STATS_
3075
3076 // (4) We check whether we should yield. If we have to, then we abort.
3077 if (_cm->should_yield()) {
3078 // We should yield. To do this we abort the task. The caller is
3079 // responsible for yielding.
3080 set_has_aborted();
3081 statsOnly( ++_aborted_yield );
3082 return;
3083 }
3084
3085 // (5) We check whether we've reached our time quota. If we have,
3086 // then we abort.
3087 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3088 if (elapsed_time_ms > _time_target_ms) {
3089 set_has_aborted();
3090 _has_aborted_timed_out = true;
3091 statsOnly( ++_aborted_timed_out );
3092 return;
3093 }
3094
3095 // (6) Finally, we check whether there are enough completed STAB
3096 // buffers available for processing. If there are, we abort.
3097 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3098 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3099 if (_cm->verbose_low())
3100 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3101 _task_id);
3102 // we do need to process SATB buffers, we'll abort and restart
3103 // the marking task to do so
3104 set_has_aborted();
3105 statsOnly( ++_aborted_satb );
3106 return;
3107 }
3108 }
3109
3110 void CMTask::recalculate_limits() {
3111 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3112 _words_scanned_limit = _real_words_scanned_limit;
3113
3114 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3115 _refs_reached_limit = _real_refs_reached_limit;
3116 }
3117
3118 void CMTask::decrease_limits() {
3119 // This is called when we believe that we're going to do an infrequent
3120 // operation which will increase the per byte scanned cost (i.e. move
3121 // entries to/from the global stack). It basically tries to decrease the
3122 // scanning limit so that the clock is called earlier.
3123
3124 if (_cm->verbose_medium())
3125 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3126
3127 _words_scanned_limit = _real_words_scanned_limit -
3128 3 * words_scanned_period / 4;
3129 _refs_reached_limit = _real_refs_reached_limit -
3130 3 * refs_reached_period / 4;
3131 }
3132
3133 void CMTask::move_entries_to_global_stack() {
3134 // local array where we'll store the entries that will be popped
3135 // from the local queue
3136 oop buffer[global_stack_transfer_size];
3137
3138 int n = 0;
3139 oop obj;
3140 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3141 buffer[n] = obj;
3142 ++n;
3143 }
3144
3145 if (n > 0) {
3146 // we popped at least one entry from the local queue
3147
3148 statsOnly( ++_global_transfers_to; _local_pops += n );
3149
3150 if (!_cm->mark_stack_push(buffer, n)) {
3151 if (_cm->verbose_low())
3152 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3153 set_has_aborted();
3154 } else {
3155 // the transfer was successful
3156
3157 if (_cm->verbose_medium())
3158 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3159 _task_id, n);
3160 statsOnly( int tmp_size = _cm->mark_stack_size();
3161 if (tmp_size > _global_max_size)
3162 _global_max_size = tmp_size;
3163 _global_pushes += n );
3164 }
3165 }
3166
3167 // this operation was quite expensive, so decrease the limits
3168 decrease_limits();
3169 }
3170
3171 void CMTask::get_entries_from_global_stack() {
3172 // local array where we'll store the entries that will be popped
3173 // from the global stack.
3174 oop buffer[global_stack_transfer_size];
3175 int n;
3176 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3177 tmp_guarantee_CM( n <= global_stack_transfer_size,
3178 "we should not pop more than the given limit" );
3179 if (n > 0) {
3180 // yes, we did actually pop at least one entry
3181
3182 statsOnly( ++_global_transfers_from; _global_pops += n );
3183 if (_cm->verbose_medium())
3184 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3185 _task_id, n);
3186 for (int i = 0; i < n; ++i) {
3187 bool success = _task_queue->push(buffer[i]);
3188 // We only call this when the local queue is empty or under a
3189 // given target limit. So, we do not expect this push to fail.
3190 tmp_guarantee_CM( success, "invariant" );
3191 }
3192
3193 statsOnly( int tmp_size = _task_queue->size();
3194 if (tmp_size > _local_max_size)
3195 _local_max_size = tmp_size;
3196 _local_pushes += n );
3197 }
3198
3199 // this operation was quite expensive, so decrease the limits
3200 decrease_limits();
3201 }
3202
3203 void CMTask::drain_local_queue(bool partially) {
3204 if (has_aborted())
3205 return;
3206
3207 // Decide what the target size is, depending whether we're going to
3208 // drain it partially (so that other tasks can steal if they run out
3209 // of things to do) or totally (at the very end).
3210 size_t target_size;
3211 if (partially)
3212 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3213 else
3214 target_size = 0;
3215
3216 if (_task_queue->size() > target_size) {
3217 if (_cm->verbose_high())
3218 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3219 _task_id, target_size);
3220
3221 oop obj;
3222 bool ret = _task_queue->pop_local(obj);
3223 while (ret) {
3224 statsOnly( ++_local_pops );
3225
3226 if (_cm->verbose_high())
3227 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3228 (void*) obj);
3229
3230 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3231 "invariant" );
3232
3233 scan_object(obj);
3234
3235 if (_task_queue->size() <= target_size || has_aborted())
3236 ret = false;
3237 else
3238 ret = _task_queue->pop_local(obj);
3239 }
3240
3241 if (_cm->verbose_high())
3242 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3243 _task_id, _task_queue->size());
3244 }
3245 }
3246
3247 void CMTask::drain_global_stack(bool partially) {
3248 if (has_aborted())
3249 return;
3250
3251 // We have a policy to drain the local queue before we attempt to
3252 // drain the global stack.
3253 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3254
3255 // Decide what the target size is, depending whether we're going to
3256 // drain it partially (so that other tasks can steal if they run out
3257 // of things to do) or totally (at the very end). Notice that,
3258 // because we move entries from the global stack in chunks or
3259 // because another task might be doing the same, we might in fact
3260 // drop below the target. But, this is not a problem.
3261 size_t target_size;
3262 if (partially)
3263 target_size = _cm->partial_mark_stack_size_target();
3264 else
3265 target_size = 0;
3266
3267 if (_cm->mark_stack_size() > target_size) {
3268 if (_cm->verbose_low())
3269 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3270 _task_id, target_size);
3271
3272 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3273 get_entries_from_global_stack();
3274 drain_local_queue(partially);
3275 }
3276
3277 if (_cm->verbose_low())
3278 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3279 _task_id, _cm->mark_stack_size());
3280 }
3281 }
3282
3283 // SATB Queue has several assumptions on whether to call the par or
3284 // non-par versions of the methods. this is why some of the code is
3285 // replicated. We should really get rid of the single-threaded version
3286 // of the code to simplify things.
3287 void CMTask::drain_satb_buffers() {
3288 if (has_aborted())
3289 return;
3290
3291 // We set this so that the regular clock knows that we're in the
3292 // middle of draining buffers and doesn't set the abort flag when it
3293 // notices that SATB buffers are available for draining. It'd be
3294 // very counter productive if it did that. :-)
3295 _draining_satb_buffers = true;
3296
3297 CMObjectClosure oc(this);
3298 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3299 if (ParallelGCThreads > 0)
3300 satb_mq_set.set_par_closure(_task_id, &oc);
3301 else
3302 satb_mq_set.set_closure(&oc);
3303
3304 // This keeps claiming and applying the closure to completed buffers
3305 // until we run out of buffers or we need to abort.
3306 if (ParallelGCThreads > 0) {
3307 while (!has_aborted() &&
3308 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3309 if (_cm->verbose_medium())
3310 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3311 statsOnly( ++_satb_buffers_processed );
3312 regular_clock_call();
3313 }
3314 } else {
3315 while (!has_aborted() &&
3316 satb_mq_set.apply_closure_to_completed_buffer()) {
3317 if (_cm->verbose_medium())
3318 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3319 statsOnly( ++_satb_buffers_processed );
3320 regular_clock_call();
3321 }
3322 }
3323
3324 if (!concurrent() && !has_aborted()) {
3325 // We should only do this during remark.
3326 if (ParallelGCThreads > 0)
3327 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3328 else
3329 satb_mq_set.iterate_closure_all_threads();
3330 }
3331
3332 _draining_satb_buffers = false;
3333
3334 tmp_guarantee_CM( has_aborted() ||
3335 concurrent() ||
3336 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3337
3338 if (ParallelGCThreads > 0)
3339 satb_mq_set.set_par_closure(_task_id, NULL);
3340 else
3341 satb_mq_set.set_closure(NULL);
3342
3343 // again, this was a potentially expensive operation, decrease the
3344 // limits to get the regular clock call early
3345 decrease_limits();
3346 }
3347
3348 void CMTask::drain_region_stack(BitMapClosure* bc) {
3349 if (has_aborted())
3350 return;
3351
3352 tmp_guarantee_CM( _region_finger == NULL,
3353 "it should be NULL when we're not scanning a region" );
3354
3355 if (!_cm->region_stack_empty()) {
3356 if (_cm->verbose_low())
3357 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3358 _task_id, _cm->region_stack_size());
3359
3360 MemRegion mr = _cm->region_stack_pop();
3361 // it returns MemRegion() if the pop fails
3362 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3363
3364 while (mr.start() != NULL) {
3365 if (_cm->verbose_medium())
3366 gclog_or_tty->print_cr("[%d] we are scanning region "
3367 "["PTR_FORMAT", "PTR_FORMAT")",
3368 _task_id, mr.start(), mr.end());
3369 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3370 "otherwise the region shouldn't be on the stack" );
3371 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3372 if (_nextMarkBitMap->iterate(bc, mr)) {
3373 tmp_guarantee_CM( !has_aborted(),
3374 "cannot abort the task without aborting the bitmap iteration" );
3375
3376 // We finished iterating over the region without aborting.
3377 regular_clock_call();
3378 if (has_aborted())
3379 mr = MemRegion();
3380 else {
3381 mr = _cm->region_stack_pop();
3382 // it returns MemRegion() if the pop fails
3383 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3384 }
3385 } else {
3386 guarantee( has_aborted(), "currently the only way to do so" );
3387
3388 // The only way to abort the bitmap iteration is to return
3389 // false from the do_bit() method. However, inside the
3390 // do_bit() method we move the _region_finger to point to the
3391 // object currently being looked at. So, if we bail out, we
3392 // have definitely set _region_finger to something non-null.
3393 guarantee( _region_finger != NULL, "invariant" );
3394
3395 // The iteration was actually aborted. So now _region_finger
3396 // points to the address of the object we last scanned. If we
3397 // leave it there, when we restart this task, we will rescan
3398 // the object. It is easy to avoid this. We move the finger by
3399 // enough to point to the next possible object header (the
3400 // bitmap knows by how much we need to move it as it knows its
3401 // granularity).
3402 MemRegion newRegion =
3403 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3404
3405 if (!newRegion.is_empty()) {
3406 if (_cm->verbose_low()) {
3407 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3408 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3409 _task_id,
3410 newRegion.start(), newRegion.end());
3411 }
3412 // Now push the part of the region we didn't scan on the
3413 // region stack to make sure a task scans it later.
3414 _cm->region_stack_push(newRegion);
3415 }
3416 // break from while
3417 mr = MemRegion();
3418 }
3419 _region_finger = NULL;
3420 }
3421
3422 // We only push regions on the region stack during evacuation
3423 // pauses. So if we come out the above iteration because we region
3424 // stack is empty, it will remain empty until the next yield
3425 // point. So, the guarantee below is safe.
3426 guarantee( has_aborted() || _cm->region_stack_empty(),
3427 "only way to exit the loop" );
3428
3429 if (_cm->verbose_low())
3430 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3431 _task_id, _cm->region_stack_size());
3432 }
3433 }
3434
3435 void CMTask::print_stats() {
3436 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3437 _task_id, _calls);
3438 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3439 _elapsed_time_ms, _termination_time_ms);
3440 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3441 _step_times_ms.num(), _step_times_ms.avg(),
3442 _step_times_ms.sd());
3443 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3444 _step_times_ms.maximum(), _step_times_ms.sum());
3445
3446 #if _MARKING_STATS_
3447 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3448 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3449 _all_clock_intervals_ms.sd());
3450 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3451 _all_clock_intervals_ms.maximum(),
3452 _all_clock_intervals_ms.sum());
3453 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3454 _clock_due_to_scanning, _clock_due_to_marking);
3455 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3456 _objs_scanned, _objs_found_on_bitmap);
3457 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3458 _local_pushes, _local_pops, _local_max_size);
3459 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3460 _global_pushes, _global_pops, _global_max_size);
3461 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3462 _global_transfers_to,_global_transfers_from);
3463 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3464 _regions_claimed, _region_stack_pops);
3465 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3466 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3467 _steal_attempts, _steals);
3468 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3469 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3470 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3471 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3472 _aborted_timed_out, _aborted_satb, _aborted_termination);
3473 #endif // _MARKING_STATS_
3474 }
3475
3476 /*****************************************************************************
3477
3478 The do_marking_step(time_target_ms) method is the building block
3479 of the parallel marking framework. It can be called in parallel
3480 with other invocations of do_marking_step() on different tasks
3481 (but only one per task, obviously) and concurrently with the
3482 mutator threads, or during remark, hence it eliminates the need
3483 for two versions of the code. When called during remark, it will
3484 pick up from where the task left off during the concurrent marking
3485 phase. Interestingly, tasks are also claimable during evacuation
3486 pauses too, since do_marking_step() ensures that it aborts before
3487 it needs to yield.
3488
3489 The data structures that is uses to do marking work are the
3490 following:
3491
3492 (1) Marking Bitmap. If there are gray objects that appear only
3493 on the bitmap (this happens either when dealing with an overflow
3494 or when the initial marking phase has simply marked the roots
3495 and didn't push them on the stack), then tasks claim heap
3496 regions whose bitmap they then scan to find gray objects. A
3497 global finger indicates where the end of the last claimed region
3498 is. A local finger indicates how far into the region a task has
3499 scanned. The two fingers are used to determine how to gray an
3500 object (i.e. whether simply marking it is OK, as it will be
3501 visited by a task in the future, or whether it needs to be also
3502 pushed on a stack).
3503
3504 (2) Local Queue. The local queue of the task which is accessed
3505 reasonably efficiently by the task. Other tasks can steal from
3506 it when they run out of work. Throughout the marking phase, a
3507 task attempts to keep its local queue short but not totally
3508 empty, so that entries are available for stealing by other
3509 tasks. Only when there is no more work, a task will totally
3510 drain its local queue.
3511
3512 (3) Global Mark Stack. This handles local queue overflow. During
3513 marking only sets of entries are moved between it and the local
3514 queues, as access to it requires a mutex and more fine-grain
3515 interaction with it which might cause contention. If it
3516 overflows, then the marking phase should restart and iterate
3517 over the bitmap to identify gray objects. Throughout the marking
3518 phase, tasks attempt to keep the global mark stack at a small
3519 length but not totally empty, so that entries are available for
3520 popping by other tasks. Only when there is no more work, tasks
3521 will totally drain the global mark stack.
3522
3523 (4) Global Region Stack. Entries on it correspond to areas of
3524 the bitmap that need to be scanned since they contain gray
3525 objects. Pushes on the region stack only happen during
3526 evacuation pauses and typically correspond to areas covered by
3527 GC LABS. If it overflows, then the marking phase should restart
3528 and iterate over the bitmap to identify gray objects. Tasks will
3529 try to totally drain the region stack as soon as possible.
3530
3531 (5) SATB Buffer Queue. This is where completed SATB buffers are
3532 made available. Buffers are regularly removed from this queue
3533 and scanned for roots, so that the queue doesn't get too
3534 long. During remark, all completed buffers are processed, as
3535 well as the filled in parts of any uncompleted buffers.
3536
3537 The do_marking_step() method tries to abort when the time target
3538 has been reached. There are a few other cases when the
3539 do_marking_step() method also aborts:
3540
3541 (1) When the marking phase has been aborted (after a Full GC).
3542
3543 (2) When a global overflow (either on the global stack or the
3544 region stack) has been triggered. Before the task aborts, it
3545 will actually sync up with the other tasks to ensure that all
3546 the marking data structures (local queues, stacks, fingers etc.)
3547 are re-initialised so that when do_marking_step() completes,
3548 the marking phase can immediately restart.
3549
3550 (3) When enough completed SATB buffers are available. The
3551 do_marking_step() method only tries to drain SATB buffers right
3552 at the beginning. So, if enough buffers are available, the
3553 marking step aborts and the SATB buffers are processed at
3554 the beginning of the next invocation.
3555
3556 (4) To yield. when we have to yield then we abort and yield
3557 right at the end of do_marking_step(). This saves us from a lot
3558 of hassle as, by yielding we might allow a Full GC. If this
3559 happens then objects will be compacted underneath our feet, the
3560 heap might shrink, etc. We save checking for this by just
3561 aborting and doing the yield right at the end.
3562
3563 From the above it follows that the do_marking_step() method should
3564 be called in a loop (or, otherwise, regularly) until it completes.
3565
3566 If a marking step completes without its has_aborted() flag being
3567 true, it means it has completed the current marking phase (and
3568 also all other marking tasks have done so and have all synced up).
3569
3570 A method called regular_clock_call() is invoked "regularly" (in
3571 sub ms intervals) throughout marking. It is this clock method that
3572 checks all the abort conditions which were mentioned above and
3573 decides when the task should abort. A work-based scheme is used to
3574 trigger this clock method: when the number of object words the
3575 marking phase has scanned or the number of references the marking
3576 phase has visited reach a given limit. Additional invocations to
3577 the method clock have been planted in a few other strategic places
3578 too. The initial reason for the clock method was to avoid calling
3579 vtime too regularly, as it is quite expensive. So, once it was in
3580 place, it was natural to piggy-back all the other conditions on it
3581 too and not constantly check them throughout the code.
3582
3583 *****************************************************************************/
3584
3585 void CMTask::do_marking_step(double time_target_ms) {
3586 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3587 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3588
3589 guarantee( concurrent() || _cm->region_stack_empty(),
3590 "the region stack should have been cleared before remark" );
3591 guarantee( _region_finger == NULL,
3592 "this should be non-null only when a region is being scanned" );
3593
3594 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3595 guarantee( _task_queues != NULL, "invariant" );
3596 guarantee( _task_queue != NULL, "invariant" );
3597 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3598
3599 guarantee( !_claimed,
3600 "only one thread should claim this task at any one time" );
3601
3602 // OK, this doesn't safeguard again all possible scenarios, as it is
3603 // possible for two threads to set the _claimed flag at the same
3604 // time. But it is only for debugging purposes anyway and it will
3605 // catch most problems.
3606 _claimed = true;
3607
3608 _start_time_ms = os::elapsedVTime() * 1000.0;
3609 statsOnly( _interval_start_time_ms = _start_time_ms );
3610
3611 double diff_prediction_ms =
3612 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3613 _time_target_ms = time_target_ms - diff_prediction_ms;
3614
3615 // set up the variables that are used in the work-based scheme to
3616 // call the regular clock method
3617 _words_scanned = 0;
3618 _refs_reached = 0;
3619 recalculate_limits();
3620
3621 // clear all flags
3622 clear_has_aborted();
3623 _has_aborted_timed_out = false;
3624 _draining_satb_buffers = false;
3625
3626 ++_calls;
3627
3628 if (_cm->verbose_low())
3629 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3630 "target = %1.2lfms >>>>>>>>>>",
3631 _task_id, _calls, _time_target_ms);
3632
3633 // Set up the bitmap and oop closures. Anything that uses them is
3634 // eventually called from this method, so it is OK to allocate these
3635 // statically.
3636 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3637 CMOopClosure oop_closure(_g1h, _cm, this);
3638 set_oop_closure(&oop_closure);
3639
3640 if (_cm->has_overflown()) {
3641 // This can happen if the region stack or the mark stack overflows
3642 // during a GC pause and this task, after a yield point,
3643 // restarts. We have to abort as we need to get into the overflow
3644 // protocol which happens right at the end of this task.
3645 set_has_aborted();
3646 }
3647
3648 // First drain any available SATB buffers. After this, we will not
3649 // look at SATB buffers before the next invocation of this method.
3650 // If enough completed SATB buffers are queued up, the regular clock
3651 // will abort this task so that it restarts.
3652 drain_satb_buffers();
3653 // ...then partially drain the local queue and the global stack
3654 drain_local_queue(true);
3655 drain_global_stack(true);
3656
3657 // Then totally drain the region stack. We will not look at
3658 // it again before the next invocation of this method. Entries on
3659 // the region stack are only added during evacuation pauses, for
3660 // which we have to yield. When we do, we abort the task anyway so
3661 // it will look at the region stack again when it restarts.
3662 bitmap_closure.set_scanning_heap_region(false);
3663 drain_region_stack(&bitmap_closure);
3664 // ...then partially drain the local queue and the global stack
3665 drain_local_queue(true);
3666 drain_global_stack(true);
3667
3668 do {
3669 if (!has_aborted() && _curr_region != NULL) {
3670 // This means that we're already holding on to a region.
3671 tmp_guarantee_CM( _finger != NULL,
3672 "if region is not NULL, then the finger "
3673 "should not be NULL either" );
3674
3675 // We might have restarted this task after an evacuation pause
3676 // which might have evacuated the region we're holding on to
3677 // underneath our feet. Let's read its limit again to make sure
3678 // that we do not iterate over a region of the heap that
3679 // contains garbage (update_region_limit() will also move
3680 // _finger to the start of the region if it is found empty).
3681 update_region_limit();
3682 // We will start from _finger not from the start of the region,
3683 // as we might be restarting this task after aborting half-way
3684 // through scanning this region. In this case, _finger points to
3685 // the address where we last found a marked object. If this is a
3686 // fresh region, _finger points to start().
3687 MemRegion mr = MemRegion(_finger, _region_limit);
3688
3689 if (_cm->verbose_low())
3690 gclog_or_tty->print_cr("[%d] we're scanning part "
3691 "["PTR_FORMAT", "PTR_FORMAT") "
3692 "of region "PTR_FORMAT,
3693 _task_id, _finger, _region_limit, _curr_region);
3694
3695 // Let's iterate over the bitmap of the part of the
3696 // region that is left.
3697 bitmap_closure.set_scanning_heap_region(true);
3698 if (mr.is_empty() ||
3699 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3700 // We successfully completed iterating over the region. Now,
3701 // let's give up the region.
3702 giveup_current_region();
3703 regular_clock_call();
3704 } else {
3705 guarantee( has_aborted(), "currently the only way to do so" );
3706 // The only way to abort the bitmap iteration is to return
3707 // false from the do_bit() method. However, inside the
3708 // do_bit() method we move the _finger to point to the
3709 // object currently being looked at. So, if we bail out, we
3710 // have definitely set _finger to something non-null.
3711 guarantee( _finger != NULL, "invariant" );
3712
3713 // Region iteration was actually aborted. So now _finger
3714 // points to the address of the object we last scanned. If we
3715 // leave it there, when we restart this task, we will rescan
3716 // the object. It is easy to avoid this. We move the finger by
3717 // enough to point to the next possible object header (the
3718 // bitmap knows by how much we need to move it as it knows its
3719 // granularity).
3720 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3721 }
3722 }
3723 // At this point we have either completed iterating over the
3724 // region we were holding on to, or we have aborted.
3725
3726 // We then partially drain the local queue and the global stack.
3727 // (Do we really need this?)
3728 drain_local_queue(true);
3729 drain_global_stack(true);
3730
3731 // Read the note on the claim_region() method on why it might
3732 // return NULL with potentially more regions available for
3733 // claiming and why we have to check out_of_regions() to determine
3734 // whether we're done or not.
3735 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3736 // We are going to try to claim a new region. We should have
3737 // given up on the previous one.
3738 tmp_guarantee_CM( _curr_region == NULL &&
3739 _finger == NULL &&
3740 _region_limit == NULL, "invariant" );
3741 if (_cm->verbose_low())
3742 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3743 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3744 if (claimed_region != NULL) {
3745 // Yes, we managed to claim one
3746 statsOnly( ++_regions_claimed );
3747
3748 if (_cm->verbose_low())
3749 gclog_or_tty->print_cr("[%d] we successfully claimed "
3750 "region "PTR_FORMAT,
3751 _task_id, claimed_region);
3752
3753 setup_for_region(claimed_region);
3754 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3755 }
3756 // It is important to call the regular clock here. It might take
3757 // a while to claim a region if, for example, we hit a large
3758 // block of empty regions. So we need to call the regular clock
3759 // method once round the loop to make sure it's called
3760 // frequently enough.
3761 regular_clock_call();
3762 }
3763
3764 if (!has_aborted() && _curr_region == NULL) {
3765 tmp_guarantee_CM( _cm->out_of_regions(),
3766 "at this point we should be out of regions" );
3767 }
3768 } while ( _curr_region != NULL && !has_aborted());
3769
3770 if (!has_aborted()) {
3771 // We cannot check whether the global stack is empty, since other
3772 // tasks might be pushing objects to it concurrently. We also cannot
3773 // check if the region stack is empty because if a thread is aborting
3774 // it can push a partially done region back.
3775 tmp_guarantee_CM( _cm->out_of_regions(),
3776 "at this point we should be out of regions" );
3777
3778 if (_cm->verbose_low())
3779 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3780
3781 // Try to reduce the number of available SATB buffers so that
3782 // remark has less work to do.
3783 drain_satb_buffers();
3784 }
3785
3786 // Since we've done everything else, we can now totally drain the
3787 // local queue and global stack.
3788 drain_local_queue(false);
3789 drain_global_stack(false);
3790
3791 // Attempt at work stealing from other task's queues.
3792 if (!has_aborted()) {
3793 // We have not aborted. This means that we have finished all that
3794 // we could. Let's try to do some stealing...
3795
3796 // We cannot check whether the global stack is empty, since other
3797 // tasks might be pushing objects to it concurrently. We also cannot
3798 // check if the region stack is empty because if a thread is aborting
3799 // it can push a partially done region back.
3800 guarantee( _cm->out_of_regions() &&
3801 _task_queue->size() == 0, "only way to reach here" );
3802
3803 if (_cm->verbose_low())
3804 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3805
3806 while (!has_aborted()) {
3807 oop obj;
3808 statsOnly( ++_steal_attempts );
3809
3810 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3811 if (_cm->verbose_medium())
3812 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3813 _task_id, (void*) obj);
3814
3815 statsOnly( ++_steals );
3816
3817 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3818 "any stolen object should be marked" );
3819 scan_object(obj);
3820
3821 // And since we're towards the end, let's totally drain the
3822 // local queue and global stack.
3823 drain_local_queue(false);
3824 drain_global_stack(false);
3825 } else {
3826 break;
3827 }
3828 }
3829 }
3830
3831 // We still haven't aborted. Now, let's try to get into the
3832 // termination protocol.
3833 if (!has_aborted()) {
3834 // We cannot check whether the global stack is empty, since other
3835 // tasks might be concurrently pushing objects on it. We also cannot
3836 // check if the region stack is empty because if a thread is aborting
3837 // it can push a partially done region back.
3838 guarantee( _cm->out_of_regions() &&
3839 _task_queue->size() == 0, "only way to reach here" );
3840
3841 if (_cm->verbose_low())
3842 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3843
3844 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3845 // The CMTask class also extends the TerminatorTerminator class,
3846 // hence its should_exit_termination() method will also decide
3847 // whether to exit the termination protocol or not.
3848 bool finished = _cm->terminator()->offer_termination(this);
3849 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3850 _termination_time_ms +=
3851 termination_end_time_ms - _termination_start_time_ms;
3852
3853 if (finished) {
3854 // We're all done.
3855
3856 if (_task_id == 0) {
3857 // let's allow task 0 to do this
3858 if (concurrent()) {
3859 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3860 // we need to set this to false before the next
3861 // safepoint. This way we ensure that the marking phase
3862 // doesn't observe any more heap expansions.
3863 _cm->clear_concurrent_marking_in_progress();
3864 }
3865 }
3866
3867 // We can now guarantee that the global stack is empty, since
3868 // all other tasks have finished.
3869 guarantee( _cm->out_of_regions() &&
3870 _cm->region_stack_empty() &&
3871 _cm->mark_stack_empty() &&
3872 _task_queue->size() == 0 &&
3873 !_cm->has_overflown() &&
3874 !_cm->mark_stack_overflow() &&
3875 !_cm->region_stack_overflow(),
3876 "only way to reach here" );
3877
3878 if (_cm->verbose_low())
3879 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3880 } else {
3881 // Apparently there's more work to do. Let's abort this task. It
3882 // will restart it and we can hopefully find more things to do.
3883
3884 if (_cm->verbose_low())
3885 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3886
3887 set_has_aborted();
3888 statsOnly( ++_aborted_termination );
3889 }
3890 }
3891
3892 // Mainly for debugging purposes to make sure that a pointer to the
3893 // closure which was statically allocated in this frame doesn't
3894 // escape it by accident.
3895 set_oop_closure(NULL);
3896 double end_time_ms = os::elapsedVTime() * 1000.0;
3897 double elapsed_time_ms = end_time_ms - _start_time_ms;
3898 // Update the step history.
3899 _step_times_ms.add(elapsed_time_ms);
3900
3901 if (has_aborted()) {
3902 // The task was aborted for some reason.
3903
3904 statsOnly( ++_aborted );
3905
3906 if (_has_aborted_timed_out) {
3907 double diff_ms = elapsed_time_ms - _time_target_ms;
3908 // Keep statistics of how well we did with respect to hitting
3909 // our target only if we actually timed out (if we aborted for
3910 // other reasons, then the results might get skewed).
3911 _marking_step_diffs_ms.add(diff_ms);
3912 }
3913
3914 if (_cm->has_overflown()) {
3915 // This is the interesting one. We aborted because a global
3916 // overflow was raised. This means we have to restart the
3917 // marking phase and start iterating over regions. However, in
3918 // order to do this we have to make sure that all tasks stop
3919 // what they are doing and re-initialise in a safe manner. We
3920 // will achieve this with the use of two barrier sync points.
3921
3922 if (_cm->verbose_low())
3923 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3924
3925 _cm->enter_first_sync_barrier(_task_id);
3926 // When we exit this sync barrier we know that all tasks have
3927 // stopped doing marking work. So, it's now safe to
3928 // re-initialise our data structures. At the end of this method,
3929 // task 0 will clear the global data structures.
3930
3931 statsOnly( ++_aborted_overflow );
3932
3933 // We clear the local state of this task...
3934 clear_region_fields();
3935
3936 // ...and enter the second barrier.
3937 _cm->enter_second_sync_barrier(_task_id);
3938 // At this point everything has bee re-initialised and we're
3939 // ready to restart.
3940 }
3941
3942 if (_cm->verbose_low()) {
3943 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3944 "elapsed = %1.2lfms <<<<<<<<<<",
3945 _task_id, _time_target_ms, elapsed_time_ms);
3946 if (_cm->has_aborted())
3947 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3948 _task_id);
3949 }
3950 } else {
3951 if (_cm->verbose_low())
3952 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3953 "elapsed = %1.2lfms <<<<<<<<<<",
3954 _task_id, _time_target_ms, elapsed_time_ms);
3955 }
3956
3957 _claimed = false;
3958 }
3959
3960 CMTask::CMTask(int task_id,
3961 ConcurrentMark* cm,
3962 CMTaskQueue* task_queue,
3963 CMTaskQueueSet* task_queues)
3964 : _g1h(G1CollectedHeap::heap()),
3965 _co_tracker(G1CMGroup),
3966 _task_id(task_id), _cm(cm),
3967 _claimed(false),
3968 _nextMarkBitMap(NULL), _hash_seed(17),
3969 _task_queue(task_queue),
3970 _task_queues(task_queues),
3971 _oop_closure(NULL) {
3972 guarantee( task_queue != NULL, "invariant" );
3973 guarantee( task_queues != NULL, "invariant" );
3974
3975 statsOnly( _clock_due_to_scanning = 0;
3976 _clock_due_to_marking = 0 );
3977
3978 _marking_step_diffs_ms.add(0.5);
3979 }