comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 2369:92da084fefc9

6668573: CMS: reference processing crash if ParallelCMSThreads > ParallelGCThreads Summary: Use _max_num_q = max(discovery_degree, processing_degree), and let balance_queues() redistribute from discovery_degree to processing_degree of queues. This should also allow a more dynamic and flexible parallelism policy in the future. Reviewed-by: jmasa, johnc
author ysr
date Thu, 17 Mar 2011 10:32:46 -0700
parents a181f3a124dd
children b099aaf51bf8
comparison
equal deleted inserted replaced
2368:dde920245681 2369:92da084fefc9
290 } 290 }
291 291
292 void CMSCollector::ref_processor_init() { 292 void CMSCollector::ref_processor_init() {
293 if (_ref_processor == NULL) { 293 if (_ref_processor == NULL) {
294 // Allocate and initialize a reference processor 294 // Allocate and initialize a reference processor
295 _ref_processor = ReferenceProcessor::create_ref_processor( 295 _ref_processor =
296 _span, // span 296 new ReferenceProcessor(_span, // span
297 _cmsGen->refs_discovery_is_atomic(), // atomic_discovery 297 (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing
298 _cmsGen->refs_discovery_is_mt(), // mt_discovery 298 (int) ParallelGCThreads, // mt processing degree
299 &_is_alive_closure, 299 _cmsGen->refs_discovery_is_mt(), // mt discovery
300 ParallelGCThreads, 300 (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
301 ParallelRefProcEnabled); 301 _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic
302 &_is_alive_closure, // closure for liveness info
303 false); // next field updates do not need write barrier
302 // Initialize the _ref_processor field of CMSGen 304 // Initialize the _ref_processor field of CMSGen
303 _cmsGen->set_ref_processor(_ref_processor); 305 _cmsGen->set_ref_processor(_ref_processor);
304 306
305 // Allocate a dummy ref processor for perm gen. 307 // Allocate a dummy ref processor for perm gen.
306 ReferenceProcessor* rp2 = new ReferenceProcessor(); 308 ReferenceProcessor* rp2 = new ReferenceProcessor();
639 warning("Failed to allocate CMS Revisit Stack"); 641 warning("Failed to allocate CMS Revisit Stack");
640 return; 642 return;
641 } 643 }
642 644
643 // Support for multi-threaded concurrent phases 645 // Support for multi-threaded concurrent phases
644 if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { 646 if (CMSConcurrentMTEnabled) {
645 if (FLAG_IS_DEFAULT(ConcGCThreads)) { 647 if (FLAG_IS_DEFAULT(ConcGCThreads)) {
646 // just for now 648 // just for now
647 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); 649 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
648 } 650 }
649 if (ConcGCThreads > 1) { 651 if (ConcGCThreads > 1) {
1988 } 1990 }
1989 1991
1990 // Temporarily widen the span of the weak reference processing to 1992 // Temporarily widen the span of the weak reference processing to
1991 // the entire heap. 1993 // the entire heap.
1992 MemRegion new_span(GenCollectedHeap::heap()->reserved_region()); 1994 MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
1993 ReferenceProcessorSpanMutator x(ref_processor(), new_span); 1995 ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span);
1994
1995 // Temporarily, clear the "is_alive_non_header" field of the 1996 // Temporarily, clear the "is_alive_non_header" field of the
1996 // reference processor. 1997 // reference processor.
1997 ReferenceProcessorIsAliveMutator y(ref_processor(), NULL); 1998 ReferenceProcessorIsAliveMutator rp_mut_closure(ref_processor(), NULL);
1998
1999 // Temporarily make reference _processing_ single threaded (non-MT). 1999 // Temporarily make reference _processing_ single threaded (non-MT).
2000 ReferenceProcessorMTProcMutator z(ref_processor(), false); 2000 ReferenceProcessorMTProcMutator rp_mut_mt_processing(ref_processor(), false);
2001
2002 // Temporarily make refs discovery atomic 2001 // Temporarily make refs discovery atomic
2003 ReferenceProcessorAtomicMutator w(ref_processor(), true); 2002 ReferenceProcessorAtomicMutator rp_mut_atomic(ref_processor(), true);
2003 // Temporarily make reference _discovery_ single threaded (non-MT)
2004 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
2004 2005
2005 ref_processor()->set_enqueuing_is_done(false); 2006 ref_processor()->set_enqueuing_is_done(false);
2006 ref_processor()->enable_discovery(); 2007 ref_processor()->enable_discovery();
2007 ref_processor()->setup_policy(clear_all_soft_refs); 2008 ref_processor()->setup_policy(clear_all_soft_refs);
2008 // If an asynchronous collection finishes, the _modUnionTable is 2009 // If an asynchronous collection finishes, the _modUnionTable is
4263 cms_space ->initialize_sequential_subtasks_for_marking(num_workers); 4264 cms_space ->initialize_sequential_subtasks_for_marking(num_workers);
4264 perm_space->initialize_sequential_subtasks_for_marking(num_workers); 4265 perm_space->initialize_sequential_subtasks_for_marking(num_workers);
4265 4266
4266 // Refs discovery is already non-atomic. 4267 // Refs discovery is already non-atomic.
4267 assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic"); 4268 assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
4268 // Mutate the Refs discovery so it is MT during the 4269 assert(ref_processor()->discovery_is_mt(), "Discovery should be MT");
4269 // multi-threaded marking phase.
4270 ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1);
4271 DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());) 4270 DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
4272 conc_workers()->start_task(&tsk); 4271 conc_workers()->start_task(&tsk);
4273 while (tsk.yielded()) { 4272 while (tsk.yielded()) {
4274 tsk.coordinator_yield(); 4273 tsk.coordinator_yield();
4275 conc_workers()->continue_task(&tsk); 4274 conc_workers()->continue_task(&tsk);
4318 4317
4319 bool CMSCollector::do_marking_st(bool asynch) { 4318 bool CMSCollector::do_marking_st(bool asynch) {
4320 ResourceMark rm; 4319 ResourceMark rm;
4321 HandleMark hm; 4320 HandleMark hm;
4322 4321
4322 // Temporarily make refs discovery single threaded (non-MT)
4323 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
4323 MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap, 4324 MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
4324 &_markStack, &_revisitStack, CMSYield && asynch); 4325 &_markStack, &_revisitStack, CMSYield && asynch);
4325 // the last argument to iterate indicates whether the iteration 4326 // the last argument to iterate indicates whether the iteration
4326 // should be incremental with periodic yields. 4327 // should be incremental with periodic yields.
4327 _markBitMap.iterate(&markFromRootsClosure); 4328 _markBitMap.iterate(&markFromRootsClosure);
4356 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread"); 4357 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
4357 verify_work_stacks_empty(); 4358 verify_work_stacks_empty();
4358 verify_overflow_empty(); 4359 verify_overflow_empty();
4359 _abort_preclean = false; 4360 _abort_preclean = false;
4360 if (CMSPrecleaningEnabled) { 4361 if (CMSPrecleaningEnabled) {
4361 // Precleaning is currently not MT but the reference processor
4362 // may be set for MT. Disable it temporarily here.
4363 ReferenceProcessor* rp = ref_processor();
4364 ReferenceProcessorMTProcMutator z(rp, false);
4365 _eden_chunk_index = 0; 4362 _eden_chunk_index = 0;
4366 size_t used = get_eden_used(); 4363 size_t used = get_eden_used();
4367 size_t capacity = get_eden_capacity(); 4364 size_t capacity = get_eden_capacity();
4368 // Don't start sampling unless we will get sufficiently 4365 // Don't start sampling unless we will get sufficiently
4369 // many samples. 4366 // many samples.
4502 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) { 4499 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
4503 assert(_collectorState == Precleaning || 4500 assert(_collectorState == Precleaning ||
4504 _collectorState == AbortablePreclean, "incorrect state"); 4501 _collectorState == AbortablePreclean, "incorrect state");
4505 ResourceMark rm; 4502 ResourceMark rm;
4506 HandleMark hm; 4503 HandleMark hm;
4504
4505 // Precleaning is currently not MT but the reference processor
4506 // may be set for MT. Disable it temporarily here.
4507 ReferenceProcessor* rp = ref_processor();
4508 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
4509
4507 // Do one pass of scrubbing the discovered reference lists 4510 // Do one pass of scrubbing the discovered reference lists
4508 // to remove any reference objects with strongly-reachable 4511 // to remove any reference objects with strongly-reachable
4509 // referents. 4512 // referents.
4510 if (clean_refs) { 4513 if (clean_refs) {
4511 ReferenceProcessor* rp = ref_processor();
4512 CMSPrecleanRefsYieldClosure yield_cl(this); 4514 CMSPrecleanRefsYieldClosure yield_cl(this);
4513 assert(rp->span().equals(_span), "Spans should be equal"); 4515 assert(rp->span().equals(_span), "Spans should be equal");
4514 CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap, 4516 CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap,
4515 &_markStack, &_revisitStack, 4517 &_markStack, &_revisitStack,
4516 true /* preclean */); 4518 true /* preclean */);
5576 // It turns out that even when we're using 1 thread, doing the work in a 5578 // It turns out that even when we're using 1 thread, doing the work in a
5577 // separate thread causes wide variance in run times. We can't help this 5579 // separate thread causes wide variance in run times. We can't help this
5578 // in the multi-threaded case, but we special-case n=1 here to get 5580 // in the multi-threaded case, but we special-case n=1 here to get
5579 // repeatable measurements of the 1-thread overhead of the parallel code. 5581 // repeatable measurements of the 1-thread overhead of the parallel code.
5580 if (n_workers > 1) { 5582 if (n_workers > 1) {
5581 // Make refs discovery MT-safe 5583 // Make refs discovery MT-safe, if it isn't already: it may not
5582 ReferenceProcessorMTMutator mt(ref_processor(), true); 5584 // necessarily be so, since it's possible that we are doing
5585 // ST marking.
5586 ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
5583 GenCollectedHeap::StrongRootsScope srs(gch); 5587 GenCollectedHeap::StrongRootsScope srs(gch);
5584 workers->run_task(&tsk); 5588 workers->run_task(&tsk);
5585 } else { 5589 } else {
5586 GenCollectedHeap::StrongRootsScope srs(gch); 5590 GenCollectedHeap::StrongRootsScope srs(gch);
5587 tsk.work(0); 5591 tsk.work(0);
5703 CMSCollector* collector, 5707 CMSCollector* collector,
5704 const MemRegion& span, 5708 const MemRegion& span,
5705 CMSBitMap* mark_bit_map, 5709 CMSBitMap* mark_bit_map,
5706 AbstractWorkGang* workers, 5710 AbstractWorkGang* workers,
5707 OopTaskQueueSet* task_queues): 5711 OopTaskQueueSet* task_queues):
5712 // XXX Should superclass AGTWOQ also know about AWG since it knows
5713 // about the task_queues used by the AWG? Then it could initialize
5714 // the terminator() object. See 6984287. The set_for_termination()
5715 // below is a temporary band-aid for the regression in 6984287.
5708 AbstractGangTaskWOopQueues("Process referents by policy in parallel", 5716 AbstractGangTaskWOopQueues("Process referents by policy in parallel",
5709 task_queues), 5717 task_queues),
5710 _task(task), 5718 _task(task),
5711 _collector(collector), _span(span), _mark_bit_map(mark_bit_map) 5719 _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
5712 { 5720 {
5713 assert(_collector->_span.equals(_span) && !_span.is_empty(), 5721 assert(_collector->_span.equals(_span) && !_span.is_empty(),
5714 "Inconsistency in _span"); 5722 "Inconsistency in _span");
5715 } 5723 set_for_termination(workers->active_workers());
5724 }
5716 5725
5717 OopTaskQueueSet* task_queues() { return queues(); } 5726 OopTaskQueueSet* task_queues() { return queues(); }
5718 5727
5719 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 5728 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5720 5729
5872 // may have been a different number of threads doing the discovery 5881 // may have been a different number of threads doing the discovery
5873 // and a different number of discovered lists may have Ref objects. 5882 // and a different number of discovered lists may have Ref objects.
5874 // That is OK as long as the Reference lists are balanced (see 5883 // That is OK as long as the Reference lists are balanced (see
5875 // balance_all_queues() and balance_queues()). 5884 // balance_all_queues() and balance_queues()).
5876 5885
5877 5886 rp->set_active_mt_degree(ParallelGCThreads);
5878 rp->set_mt_degree(ParallelGCThreads);
5879 CMSRefProcTaskExecutor task_executor(*this); 5887 CMSRefProcTaskExecutor task_executor(*this);
5880 rp->process_discovered_references(&_is_alive_closure, 5888 rp->process_discovered_references(&_is_alive_closure,
5881 &cmsKeepAliveClosure, 5889 &cmsKeepAliveClosure,
5882 &cmsDrainMarkingStackClosure, 5890 &cmsDrainMarkingStackClosure,
5883 &task_executor); 5891 &task_executor);