Mercurial > hg > truffle
comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 2369:92da084fefc9
6668573: CMS: reference processing crash if ParallelCMSThreads > ParallelGCThreads
Summary: Use _max_num_q = max(discovery_degree, processing_degree), and let balance_queues() redistribute from discovery_degree to processing_degree of queues. This should also allow a more dynamic and flexible parallelism policy in the future.
Reviewed-by: jmasa, johnc
author | ysr |
---|---|
date | Thu, 17 Mar 2011 10:32:46 -0700 |
parents | a181f3a124dd |
children | b099aaf51bf8 |
comparison
equal
deleted
inserted
replaced
2368:dde920245681 | 2369:92da084fefc9 |
---|---|
290 } | 290 } |
291 | 291 |
292 void CMSCollector::ref_processor_init() { | 292 void CMSCollector::ref_processor_init() { |
293 if (_ref_processor == NULL) { | 293 if (_ref_processor == NULL) { |
294 // Allocate and initialize a reference processor | 294 // Allocate and initialize a reference processor |
295 _ref_processor = ReferenceProcessor::create_ref_processor( | 295 _ref_processor = |
296 _span, // span | 296 new ReferenceProcessor(_span, // span |
297 _cmsGen->refs_discovery_is_atomic(), // atomic_discovery | 297 (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing |
298 _cmsGen->refs_discovery_is_mt(), // mt_discovery | 298 (int) ParallelGCThreads, // mt processing degree |
299 &_is_alive_closure, | 299 _cmsGen->refs_discovery_is_mt(), // mt discovery |
300 ParallelGCThreads, | 300 (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree |
301 ParallelRefProcEnabled); | 301 _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic |
302 &_is_alive_closure, // closure for liveness info | |
303 false); // next field updates do not need write barrier | |
302 // Initialize the _ref_processor field of CMSGen | 304 // Initialize the _ref_processor field of CMSGen |
303 _cmsGen->set_ref_processor(_ref_processor); | 305 _cmsGen->set_ref_processor(_ref_processor); |
304 | 306 |
305 // Allocate a dummy ref processor for perm gen. | 307 // Allocate a dummy ref processor for perm gen. |
306 ReferenceProcessor* rp2 = new ReferenceProcessor(); | 308 ReferenceProcessor* rp2 = new ReferenceProcessor(); |
639 warning("Failed to allocate CMS Revisit Stack"); | 641 warning("Failed to allocate CMS Revisit Stack"); |
640 return; | 642 return; |
641 } | 643 } |
642 | 644 |
643 // Support for multi-threaded concurrent phases | 645 // Support for multi-threaded concurrent phases |
644 if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { | 646 if (CMSConcurrentMTEnabled) { |
645 if (FLAG_IS_DEFAULT(ConcGCThreads)) { | 647 if (FLAG_IS_DEFAULT(ConcGCThreads)) { |
646 // just for now | 648 // just for now |
647 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); | 649 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); |
648 } | 650 } |
649 if (ConcGCThreads > 1) { | 651 if (ConcGCThreads > 1) { |
1988 } | 1990 } |
1989 | 1991 |
1990 // Temporarily widen the span of the weak reference processing to | 1992 // Temporarily widen the span of the weak reference processing to |
1991 // the entire heap. | 1993 // the entire heap. |
1992 MemRegion new_span(GenCollectedHeap::heap()->reserved_region()); | 1994 MemRegion new_span(GenCollectedHeap::heap()->reserved_region()); |
1993 ReferenceProcessorSpanMutator x(ref_processor(), new_span); | 1995 ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span); |
1994 | |
1995 // Temporarily, clear the "is_alive_non_header" field of the | 1996 // Temporarily, clear the "is_alive_non_header" field of the |
1996 // reference processor. | 1997 // reference processor. |
1997 ReferenceProcessorIsAliveMutator y(ref_processor(), NULL); | 1998 ReferenceProcessorIsAliveMutator rp_mut_closure(ref_processor(), NULL); |
1998 | |
1999 // Temporarily make reference _processing_ single threaded (non-MT). | 1999 // Temporarily make reference _processing_ single threaded (non-MT). |
2000 ReferenceProcessorMTProcMutator z(ref_processor(), false); | 2000 ReferenceProcessorMTProcMutator rp_mut_mt_processing(ref_processor(), false); |
2001 | |
2002 // Temporarily make refs discovery atomic | 2001 // Temporarily make refs discovery atomic |
2003 ReferenceProcessorAtomicMutator w(ref_processor(), true); | 2002 ReferenceProcessorAtomicMutator rp_mut_atomic(ref_processor(), true); |
2003 // Temporarily make reference _discovery_ single threaded (non-MT) | |
2004 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false); | |
2004 | 2005 |
2005 ref_processor()->set_enqueuing_is_done(false); | 2006 ref_processor()->set_enqueuing_is_done(false); |
2006 ref_processor()->enable_discovery(); | 2007 ref_processor()->enable_discovery(); |
2007 ref_processor()->setup_policy(clear_all_soft_refs); | 2008 ref_processor()->setup_policy(clear_all_soft_refs); |
2008 // If an asynchronous collection finishes, the _modUnionTable is | 2009 // If an asynchronous collection finishes, the _modUnionTable is |
4263 cms_space ->initialize_sequential_subtasks_for_marking(num_workers); | 4264 cms_space ->initialize_sequential_subtasks_for_marking(num_workers); |
4264 perm_space->initialize_sequential_subtasks_for_marking(num_workers); | 4265 perm_space->initialize_sequential_subtasks_for_marking(num_workers); |
4265 | 4266 |
4266 // Refs discovery is already non-atomic. | 4267 // Refs discovery is already non-atomic. |
4267 assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic"); | 4268 assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic"); |
4268 // Mutate the Refs discovery so it is MT during the | 4269 assert(ref_processor()->discovery_is_mt(), "Discovery should be MT"); |
4269 // multi-threaded marking phase. | |
4270 ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1); | |
4271 DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());) | 4270 DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());) |
4272 conc_workers()->start_task(&tsk); | 4271 conc_workers()->start_task(&tsk); |
4273 while (tsk.yielded()) { | 4272 while (tsk.yielded()) { |
4274 tsk.coordinator_yield(); | 4273 tsk.coordinator_yield(); |
4275 conc_workers()->continue_task(&tsk); | 4274 conc_workers()->continue_task(&tsk); |
4318 | 4317 |
4319 bool CMSCollector::do_marking_st(bool asynch) { | 4318 bool CMSCollector::do_marking_st(bool asynch) { |
4320 ResourceMark rm; | 4319 ResourceMark rm; |
4321 HandleMark hm; | 4320 HandleMark hm; |
4322 | 4321 |
4322 // Temporarily make refs discovery single threaded (non-MT) | |
4323 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false); | |
4323 MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap, | 4324 MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap, |
4324 &_markStack, &_revisitStack, CMSYield && asynch); | 4325 &_markStack, &_revisitStack, CMSYield && asynch); |
4325 // the last argument to iterate indicates whether the iteration | 4326 // the last argument to iterate indicates whether the iteration |
4326 // should be incremental with periodic yields. | 4327 // should be incremental with periodic yields. |
4327 _markBitMap.iterate(&markFromRootsClosure); | 4328 _markBitMap.iterate(&markFromRootsClosure); |
4356 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread"); | 4357 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread"); |
4357 verify_work_stacks_empty(); | 4358 verify_work_stacks_empty(); |
4358 verify_overflow_empty(); | 4359 verify_overflow_empty(); |
4359 _abort_preclean = false; | 4360 _abort_preclean = false; |
4360 if (CMSPrecleaningEnabled) { | 4361 if (CMSPrecleaningEnabled) { |
4361 // Precleaning is currently not MT but the reference processor | |
4362 // may be set for MT. Disable it temporarily here. | |
4363 ReferenceProcessor* rp = ref_processor(); | |
4364 ReferenceProcessorMTProcMutator z(rp, false); | |
4365 _eden_chunk_index = 0; | 4362 _eden_chunk_index = 0; |
4366 size_t used = get_eden_used(); | 4363 size_t used = get_eden_used(); |
4367 size_t capacity = get_eden_capacity(); | 4364 size_t capacity = get_eden_capacity(); |
4368 // Don't start sampling unless we will get sufficiently | 4365 // Don't start sampling unless we will get sufficiently |
4369 // many samples. | 4366 // many samples. |
4502 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) { | 4499 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) { |
4503 assert(_collectorState == Precleaning || | 4500 assert(_collectorState == Precleaning || |
4504 _collectorState == AbortablePreclean, "incorrect state"); | 4501 _collectorState == AbortablePreclean, "incorrect state"); |
4505 ResourceMark rm; | 4502 ResourceMark rm; |
4506 HandleMark hm; | 4503 HandleMark hm; |
4504 | |
4505 // Precleaning is currently not MT but the reference processor | |
4506 // may be set for MT. Disable it temporarily here. | |
4507 ReferenceProcessor* rp = ref_processor(); | |
4508 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false); | |
4509 | |
4507 // Do one pass of scrubbing the discovered reference lists | 4510 // Do one pass of scrubbing the discovered reference lists |
4508 // to remove any reference objects with strongly-reachable | 4511 // to remove any reference objects with strongly-reachable |
4509 // referents. | 4512 // referents. |
4510 if (clean_refs) { | 4513 if (clean_refs) { |
4511 ReferenceProcessor* rp = ref_processor(); | |
4512 CMSPrecleanRefsYieldClosure yield_cl(this); | 4514 CMSPrecleanRefsYieldClosure yield_cl(this); |
4513 assert(rp->span().equals(_span), "Spans should be equal"); | 4515 assert(rp->span().equals(_span), "Spans should be equal"); |
4514 CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap, | 4516 CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap, |
4515 &_markStack, &_revisitStack, | 4517 &_markStack, &_revisitStack, |
4516 true /* preclean */); | 4518 true /* preclean */); |
5576 // It turns out that even when we're using 1 thread, doing the work in a | 5578 // It turns out that even when we're using 1 thread, doing the work in a |
5577 // separate thread causes wide variance in run times. We can't help this | 5579 // separate thread causes wide variance in run times. We can't help this |
5578 // in the multi-threaded case, but we special-case n=1 here to get | 5580 // in the multi-threaded case, but we special-case n=1 here to get |
5579 // repeatable measurements of the 1-thread overhead of the parallel code. | 5581 // repeatable measurements of the 1-thread overhead of the parallel code. |
5580 if (n_workers > 1) { | 5582 if (n_workers > 1) { |
5581 // Make refs discovery MT-safe | 5583 // Make refs discovery MT-safe, if it isn't already: it may not |
5582 ReferenceProcessorMTMutator mt(ref_processor(), true); | 5584 // necessarily be so, since it's possible that we are doing |
5585 // ST marking. | |
5586 ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true); | |
5583 GenCollectedHeap::StrongRootsScope srs(gch); | 5587 GenCollectedHeap::StrongRootsScope srs(gch); |
5584 workers->run_task(&tsk); | 5588 workers->run_task(&tsk); |
5585 } else { | 5589 } else { |
5586 GenCollectedHeap::StrongRootsScope srs(gch); | 5590 GenCollectedHeap::StrongRootsScope srs(gch); |
5587 tsk.work(0); | 5591 tsk.work(0); |
5703 CMSCollector* collector, | 5707 CMSCollector* collector, |
5704 const MemRegion& span, | 5708 const MemRegion& span, |
5705 CMSBitMap* mark_bit_map, | 5709 CMSBitMap* mark_bit_map, |
5706 AbstractWorkGang* workers, | 5710 AbstractWorkGang* workers, |
5707 OopTaskQueueSet* task_queues): | 5711 OopTaskQueueSet* task_queues): |
5712 // XXX Should superclass AGTWOQ also know about AWG since it knows | |
5713 // about the task_queues used by the AWG? Then it could initialize | |
5714 // the terminator() object. See 6984287. The set_for_termination() | |
5715 // below is a temporary band-aid for the regression in 6984287. | |
5708 AbstractGangTaskWOopQueues("Process referents by policy in parallel", | 5716 AbstractGangTaskWOopQueues("Process referents by policy in parallel", |
5709 task_queues), | 5717 task_queues), |
5710 _task(task), | 5718 _task(task), |
5711 _collector(collector), _span(span), _mark_bit_map(mark_bit_map) | 5719 _collector(collector), _span(span), _mark_bit_map(mark_bit_map) |
5712 { | 5720 { |
5713 assert(_collector->_span.equals(_span) && !_span.is_empty(), | 5721 assert(_collector->_span.equals(_span) && !_span.is_empty(), |
5714 "Inconsistency in _span"); | 5722 "Inconsistency in _span"); |
5715 } | 5723 set_for_termination(workers->active_workers()); |
5724 } | |
5716 | 5725 |
5717 OopTaskQueueSet* task_queues() { return queues(); } | 5726 OopTaskQueueSet* task_queues() { return queues(); } |
5718 | 5727 |
5719 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } | 5728 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } |
5720 | 5729 |
5872 // may have been a different number of threads doing the discovery | 5881 // may have been a different number of threads doing the discovery |
5873 // and a different number of discovered lists may have Ref objects. | 5882 // and a different number of discovered lists may have Ref objects. |
5874 // That is OK as long as the Reference lists are balanced (see | 5883 // That is OK as long as the Reference lists are balanced (see |
5875 // balance_all_queues() and balance_queues()). | 5884 // balance_all_queues() and balance_queues()). |
5876 | 5885 |
5877 | 5886 rp->set_active_mt_degree(ParallelGCThreads); |
5878 rp->set_mt_degree(ParallelGCThreads); | |
5879 CMSRefProcTaskExecutor task_executor(*this); | 5887 CMSRefProcTaskExecutor task_executor(*this); |
5880 rp->process_discovered_references(&_is_alive_closure, | 5888 rp->process_discovered_references(&_is_alive_closure, |
5881 &cmsKeepAliveClosure, | 5889 &cmsKeepAliveClosure, |
5882 &cmsDrainMarkingStackClosure, | 5890 &cmsDrainMarkingStackClosure, |
5883 &task_executor); | 5891 &task_executor); |