comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 1833:8b10f48633dc

6984287: Regularize how GC parallel workers are specified. Summary: Associate number of GC workers with the workgang as opposed to the task. Reviewed-by: johnc, ysr
author jmasa
date Mon, 20 Sep 2010 14:38:38 -0700
parents 179464550c7d
children 894b1d7c7e01
comparison
equal deleted inserted replaced
1781:97fbf5beff7b 1833:8b10f48633dc
193 assert(UseCompressedOops || 193 assert(UseCompressedOops ||
194 junk->prev_addr() == (void*)(oop(junk)->klass_addr()), 194 junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
195 "Offset of FreeChunk::_prev within FreeChunk must match" 195 "Offset of FreeChunk::_prev within FreeChunk must match"
196 " that of OopDesc::_klass within OopDesc"); 196 " that of OopDesc::_klass within OopDesc");
197 ) 197 )
198 if (ParallelGCThreads > 0) { 198 if (CollectedHeap::use_parallel_gc_threads()) {
199 typedef CMSParGCThreadState* CMSParGCThreadStatePtr; 199 typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
200 _par_gc_thread_states = 200 _par_gc_thread_states =
201 NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); 201 NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
202 if (_par_gc_thread_states == NULL) { 202 if (_par_gc_thread_states == NULL) {
203 vm_exit_during_initialization("Could not allocate par gc structs"); 203 vm_exit_during_initialization("Could not allocate par gc structs");
614 warning("Failed to allocate CMS Revisit Stack"); 614 warning("Failed to allocate CMS Revisit Stack");
615 return; 615 return;
616 } 616 }
617 617
618 // Support for multi-threaded concurrent phases 618 // Support for multi-threaded concurrent phases
619 if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { 619 if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
620 if (FLAG_IS_DEFAULT(ConcGCThreads)) { 620 if (FLAG_IS_DEFAULT(ConcGCThreads)) {
621 // just for now 621 // just for now
622 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); 622 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
623 } 623 }
624 if (ConcGCThreads > 1) { 624 if (ConcGCThreads > 1) {
626 ConcGCThreads, true); 626 ConcGCThreads, true);
627 if (_conc_workers == NULL) { 627 if (_conc_workers == NULL) {
628 warning("GC/CMS: _conc_workers allocation failure: " 628 warning("GC/CMS: _conc_workers allocation failure: "
629 "forcing -CMSConcurrentMTEnabled"); 629 "forcing -CMSConcurrentMTEnabled");
630 CMSConcurrentMTEnabled = false; 630 CMSConcurrentMTEnabled = false;
631 } else {
632 _conc_workers->initialize_workers();
631 } 633 }
632 } else { 634 } else {
633 CMSConcurrentMTEnabled = false; 635 CMSConcurrentMTEnabled = false;
634 } 636 }
635 } 637 }
934 void ConcurrentMarkSweepGeneration::reset_after_compaction() { 936 void ConcurrentMarkSweepGeneration::reset_after_compaction() {
935 // Clear the promotion information. These pointers can be adjusted 937 // Clear the promotion information. These pointers can be adjusted
936 // along with all the other pointers into the heap but 938 // along with all the other pointers into the heap but
937 // compaction is expected to be a rare event with 939 // compaction is expected to be a rare event with
938 // a heap using cms so don't do it without seeing the need. 940 // a heap using cms so don't do it without seeing the need.
939 if (ParallelGCThreads > 0) { 941 if (CollectedHeap::use_parallel_gc_threads()) {
940 for (uint i = 0; i < ParallelGCThreads; i++) { 942 for (uint i = 0; i < ParallelGCThreads; i++) {
941 _par_gc_thread_states[i]->promo.reset(); 943 _par_gc_thread_states[i]->promo.reset();
942 } 944 }
943 } 945 }
944 } 946 }
2628 bitMapLock()->lock_without_safepoint_check(); 2630 bitMapLock()->lock_without_safepoint_check();
2629 2631
2630 // Should call gc_prologue_work() for all cms gens we are responsible for 2632 // Should call gc_prologue_work() for all cms gens we are responsible for
2631 bool registerClosure = _collectorState >= Marking 2633 bool registerClosure = _collectorState >= Marking
2632 && _collectorState < Sweeping; 2634 && _collectorState < Sweeping;
2633 ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar 2635 ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
2636 &_modUnionClosurePar
2634 : &_modUnionClosure; 2637 : &_modUnionClosure;
2635 _cmsGen->gc_prologue_work(full, registerClosure, muc); 2638 _cmsGen->gc_prologue_work(full, registerClosure, muc);
2636 _permGen->gc_prologue_work(full, registerClosure, muc); 2639 _permGen->gc_prologue_work(full, registerClosure, muc);
2637 2640
2638 if (!full) { 2641 if (!full) {
2729 2732
2730 void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) { 2733 void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
2731 collector()->gc_epilogue(full); 2734 collector()->gc_epilogue(full);
2732 2735
2733 // Also reset promotion tracking in par gc thread states. 2736 // Also reset promotion tracking in par gc thread states.
2734 if (ParallelGCThreads > 0) { 2737 if (CollectedHeap::use_parallel_gc_threads()) {
2735 for (uint i = 0; i < ParallelGCThreads; i++) { 2738 for (uint i = 0; i < ParallelGCThreads; i++) {
2736 _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); 2739 _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
2737 } 2740 }
2738 } 2741 }
2739 } 2742 }
3729 }; 3732 };
3730 3733
3731 // MT Concurrent Marking Task 3734 // MT Concurrent Marking Task
3732 class CMSConcMarkingTask: public YieldingFlexibleGangTask { 3735 class CMSConcMarkingTask: public YieldingFlexibleGangTask {
3733 CMSCollector* _collector; 3736 CMSCollector* _collector;
3734 YieldingFlexibleWorkGang* _workers; // the whole gang
3735 int _n_workers; // requested/desired # workers 3737 int _n_workers; // requested/desired # workers
3736 bool _asynch; 3738 bool _asynch;
3737 bool _result; 3739 bool _result;
3738 CompactibleFreeListSpace* _cms_space; 3740 CompactibleFreeListSpace* _cms_space;
3739 CompactibleFreeListSpace* _perm_space; 3741 CompactibleFreeListSpace* _perm_space;
3749 3751
3750 public: 3752 public:
3751 CMSConcMarkingTask(CMSCollector* collector, 3753 CMSConcMarkingTask(CMSCollector* collector,
3752 CompactibleFreeListSpace* cms_space, 3754 CompactibleFreeListSpace* cms_space,
3753 CompactibleFreeListSpace* perm_space, 3755 CompactibleFreeListSpace* perm_space,
3754 bool asynch, int n_workers, 3756 bool asynch,
3755 YieldingFlexibleWorkGang* workers, 3757 YieldingFlexibleWorkGang* workers,
3756 OopTaskQueueSet* task_queues): 3758 OopTaskQueueSet* task_queues):
3757 YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), 3759 YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
3758 _collector(collector), 3760 _collector(collector),
3759 _cms_space(cms_space), 3761 _cms_space(cms_space),
3760 _perm_space(perm_space), 3762 _perm_space(perm_space),
3761 _asynch(asynch), _n_workers(n_workers), _result(true), 3763 _asynch(asynch), _n_workers(0), _result(true),
3762 _workers(workers), _task_queues(task_queues), 3764 _task_queues(task_queues),
3763 _term(n_workers, task_queues, _collector, asynch), 3765 _term(_n_workers, task_queues, _collector, asynch),
3764 _bit_map_lock(collector->bitMapLock()) 3766 _bit_map_lock(collector->bitMapLock())
3765 { 3767 {
3766 assert(n_workers <= workers->total_workers(), 3768 _requested_size = _n_workers;
3767 "Else termination won't work correctly today"); // XXX FIX ME!
3768 _requested_size = n_workers;
3769 _term.set_task(this); 3769 _term.set_task(this);
3770 assert(_cms_space->bottom() < _perm_space->bottom(), 3770 assert(_cms_space->bottom() < _perm_space->bottom(),
3771 "Finger incorrectly initialized below"); 3771 "Finger incorrectly initialized below");
3772 _restart_addr = _global_finger = _cms_space->bottom(); 3772 _restart_addr = _global_finger = _cms_space->bottom();
3773 } 3773 }
3778 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 3778 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
3779 3779
3780 HeapWord** global_finger_addr() { return &_global_finger; } 3780 HeapWord** global_finger_addr() { return &_global_finger; }
3781 3781
3782 CMSConcMarkingTerminator* terminator() { return &_term; } 3782 CMSConcMarkingTerminator* terminator() { return &_term; }
3783
3784 virtual void set_for_termination(int active_workers) {
3785 terminator()->reset_for_reuse(active_workers);
3786 }
3783 3787
3784 void work(int i); 3788 void work(int i);
3785 3789
3786 virtual void coordinator_yield(); // stuff done by coordinator 3790 virtual void coordinator_yield(); // stuff done by coordinator
3787 bool result() { return _result; } 3791 bool result() { return _result; }
4218 int num_workers = ConcGCThreads; 4222 int num_workers = ConcGCThreads;
4219 4223
4220 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 4224 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
4221 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 4225 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
4222 4226
4223 CMSConcMarkingTask tsk(this, cms_space, perm_space, 4227 CMSConcMarkingTask tsk(this,
4224 asynch, num_workers /* number requested XXX */, 4228 cms_space,
4225 conc_workers(), task_queues()); 4229 perm_space,
4230 asynch,
4231 conc_workers(),
4232 task_queues());
4226 4233
4227 // Since the actual number of workers we get may be different 4234 // Since the actual number of workers we get may be different
4228 // from the number we requested above, do we need to do anything different 4235 // from the number we requested above, do we need to do anything different
4229 // below? In particular, may be we need to subclass the SequantialSubTasksDone 4236 // below? In particular, may be we need to subclass the SequantialSubTasksDone
4230 // class?? XXX 4237 // class?? XXX
4324 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread"); 4331 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
4325 verify_work_stacks_empty(); 4332 verify_work_stacks_empty();
4326 verify_overflow_empty(); 4333 verify_overflow_empty();
4327 _abort_preclean = false; 4334 _abort_preclean = false;
4328 if (CMSPrecleaningEnabled) { 4335 if (CMSPrecleaningEnabled) {
4336 // Precleaning is currently not MT but the reference processor
4337 // may be set for MT. Disable it temporarily here.
4338 ReferenceProcessor* rp = ref_processor();
4339 ReferenceProcessorMTProcMutator z(rp, false);
4329 _eden_chunk_index = 0; 4340 _eden_chunk_index = 0;
4330 size_t used = get_eden_used(); 4341 size_t used = get_eden_used();
4331 size_t capacity = get_eden_capacity(); 4342 size_t capacity = get_eden_capacity();
4332 // Don't start sampling unless we will get sufficiently 4343 // Don't start sampling unless we will get sufficiently
4333 // many samples. 4344 // many samples.
4916 // A would be collected. Such updates to references in marked objects 4927 // A would be collected. Such updates to references in marked objects
4917 // are detected via the mod union table which is the set of all cards 4928 // are detected via the mod union table which is the set of all cards
4918 // dirtied since the first checkpoint in this GC cycle and prior to 4929 // dirtied since the first checkpoint in this GC cycle and prior to
4919 // the most recent young generation GC, minus those cleaned up by the 4930 // the most recent young generation GC, minus those cleaned up by the
4920 // concurrent precleaning. 4931 // concurrent precleaning.
4921 if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { 4932 if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
4922 TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); 4933 TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
4923 do_remark_parallel(); 4934 do_remark_parallel();
4924 } else { 4935 } else {
4925 TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false, 4936 TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
4926 gclog_or_tty); 4937 gclog_or_tty);
5010 } 5021 }
5011 5022
5012 // Parallel remark task 5023 // Parallel remark task
5013 class CMSParRemarkTask: public AbstractGangTask { 5024 class CMSParRemarkTask: public AbstractGangTask {
5014 CMSCollector* _collector; 5025 CMSCollector* _collector;
5015 WorkGang* _workers;
5016 int _n_workers; 5026 int _n_workers;
5017 CompactibleFreeListSpace* _cms_space; 5027 CompactibleFreeListSpace* _cms_space;
5018 CompactibleFreeListSpace* _perm_space; 5028 CompactibleFreeListSpace* _perm_space;
5019 5029
5020 // The per-thread work queues, available here for stealing. 5030 // The per-thread work queues, available here for stealing.
5023 5033
5024 public: 5034 public:
5025 CMSParRemarkTask(CMSCollector* collector, 5035 CMSParRemarkTask(CMSCollector* collector,
5026 CompactibleFreeListSpace* cms_space, 5036 CompactibleFreeListSpace* cms_space,
5027 CompactibleFreeListSpace* perm_space, 5037 CompactibleFreeListSpace* perm_space,
5028 int n_workers, WorkGang* workers, 5038 int n_workers, FlexibleWorkGang* workers,
5029 OopTaskQueueSet* task_queues): 5039 OopTaskQueueSet* task_queues):
5030 AbstractGangTask("Rescan roots and grey objects in parallel"), 5040 AbstractGangTask("Rescan roots and grey objects in parallel"),
5031 _collector(collector), 5041 _collector(collector),
5032 _cms_space(cms_space), _perm_space(perm_space), 5042 _cms_space(cms_space), _perm_space(perm_space),
5033 _n_workers(n_workers), 5043 _n_workers(n_workers),
5034 _workers(workers),
5035 _task_queues(task_queues), 5044 _task_queues(task_queues),
5036 _term(workers->total_workers(), task_queues) { } 5045 _term(n_workers, task_queues) { }
5037 5046
5038 OopTaskQueueSet* task_queues() { return _task_queues; } 5047 OopTaskQueueSet* task_queues() { return _task_queues; }
5039 5048
5040 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 5049 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5041 5050
5042 ParallelTaskTerminator* terminator() { return &_term; } 5051 ParallelTaskTerminator* terminator() { return &_term; }
5052 int n_workers() { return _n_workers; }
5043 5053
5044 void work(int i); 5054 void work(int i);
5045 5055
5046 private: 5056 private:
5047 // Work method in support of parallel rescan ... of young gen spaces 5057 // Work method in support of parallel rescan ... of young gen spaces
5054 Par_MarkRefsIntoAndScanClosure* cl); 5064 Par_MarkRefsIntoAndScanClosure* cl);
5055 5065
5056 // ... work stealing for the above 5066 // ... work stealing for the above
5057 void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); 5067 void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
5058 }; 5068 };
5069
5070 // work_queue(i) is passed to the closure
5071 // Par_MarkRefsIntoAndScanClosure. The "i" parameter
5072 // also is passed to do_dirty_card_rescan_tasks() and to
5073 // do_work_steal() to select the i-th task_queue.
5059 5074
5060 void CMSParRemarkTask::work(int i) { 5075 void CMSParRemarkTask::work(int i) {
5061 elapsedTimer _timer; 5076 elapsedTimer _timer;
5062 ResourceMark rm; 5077 ResourceMark rm;
5063 HandleMark hm; 5078 HandleMark hm;
5126 _timer.reset(); 5141 _timer.reset();
5127 _timer.start(); 5142 _timer.start();
5128 5143
5129 // Do the rescan tasks for each of the two spaces 5144 // Do the rescan tasks for each of the two spaces
5130 // (cms_space and perm_space) in turn. 5145 // (cms_space and perm_space) in turn.
5146 // "i" is passed to select the "i-th" task_queue
5131 do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); 5147 do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
5132 do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); 5148 do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
5133 _timer.stop(); 5149 _timer.stop();
5134 if (PrintCMSStatistics != 0) { 5150 if (PrintCMSStatistics != 0) {
5135 gclog_or_tty->print_cr( 5151 gclog_or_tty->print_cr(
5148 "Finished work stealing in %dth thread: %3.3f sec", 5164 "Finished work stealing in %dth thread: %3.3f sec",
5149 i, _timer.seconds()); 5165 i, _timer.seconds());
5150 } 5166 }
5151 } 5167 }
5152 5168
5169 // Note that parameter "i" is not used.
5153 void 5170 void
5154 CMSParRemarkTask::do_young_space_rescan(int i, 5171 CMSParRemarkTask::do_young_space_rescan(int i,
5155 Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, 5172 Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
5156 HeapWord** chunk_array, size_t chunk_top) { 5173 HeapWord** chunk_array, size_t chunk_top) {
5157 // Until all tasks completed: 5174 // Until all tasks completed:
5307 // Completely finish any left over work from (an) earlier round(s) 5324 // Completely finish any left over work from (an) earlier round(s)
5308 cl->trim_queue(0); 5325 cl->trim_queue(0);
5309 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, 5326 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5310 (size_t)ParGCDesiredObjsFromOverflowList); 5327 (size_t)ParGCDesiredObjsFromOverflowList);
5311 // Now check if there's any work in the overflow list 5328 // Now check if there's any work in the overflow list
5329 // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5330 // only affects the number of attempts made to get work from the
5331 // overflow list and does not affect the number of workers. Just
5332 // pass ParallelGCThreads so this behavior is unchanged.
5312 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 5333 if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5313 work_q)) { 5334 work_q,
5335 ParallelGCThreads)) {
5314 // found something in global overflow list; 5336 // found something in global overflow list;
5315 // not yet ready to go stealing work from others. 5337 // not yet ready to go stealing work from others.
5316 // We'd like to assert(work_q->size() != 0, ...) 5338 // We'd like to assert(work_q->size() != 0, ...)
5317 // because we just took work from the overflow list, 5339 // because we just took work from the overflow list,
5318 // but of course we can't since all of that could have 5340 // but of course we can't since all of that could have
5365 } 5387 }
5366 5388
5367 // Merge the per-thread plab arrays into the global survivor chunk 5389 // Merge the per-thread plab arrays into the global survivor chunk
5368 // array which will provide the partitioning of the survivor space 5390 // array which will provide the partitioning of the survivor space
5369 // for CMS rescan. 5391 // for CMS rescan.
5370 void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { 5392 void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
5393 int no_of_gc_threads) {
5371 assert(_survivor_plab_array != NULL, "Error"); 5394 assert(_survivor_plab_array != NULL, "Error");
5372 assert(_survivor_chunk_array != NULL, "Error"); 5395 assert(_survivor_chunk_array != NULL, "Error");
5373 assert(_collectorState == FinalMarking, "Error"); 5396 assert(_collectorState == FinalMarking, "Error");
5374 for (uint j = 0; j < ParallelGCThreads; j++) { 5397 for (int j = 0; j < no_of_gc_threads; j++) {
5375 _cursor[j] = 0; 5398 _cursor[j] = 0;
5376 } 5399 }
5377 HeapWord* top = surv->top(); 5400 HeapWord* top = surv->top();
5378 size_t i; 5401 size_t i;
5379 for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries 5402 for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries
5380 HeapWord* min_val = top; // Higher than any PLAB address 5403 HeapWord* min_val = top; // Higher than any PLAB address
5381 uint min_tid = 0; // position of min_val this round 5404 uint min_tid = 0; // position of min_val this round
5382 for (uint j = 0; j < ParallelGCThreads; j++) { 5405 for (int j = 0; j < no_of_gc_threads; j++) {
5383 ChunkArray* cur_sca = &_survivor_plab_array[j]; 5406 ChunkArray* cur_sca = &_survivor_plab_array[j];
5384 if (_cursor[j] == cur_sca->end()) { 5407 if (_cursor[j] == cur_sca->end()) {
5385 continue; 5408 continue;
5386 } 5409 }
5387 assert(_cursor[j] < cur_sca->end(), "ctl pt invariant"); 5410 assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
5411 gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i); 5434 gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
5412 } 5435 }
5413 // Verify that we used up all the recorded entries 5436 // Verify that we used up all the recorded entries
5414 #ifdef ASSERT 5437 #ifdef ASSERT
5415 size_t total = 0; 5438 size_t total = 0;
5416 for (uint j = 0; j < ParallelGCThreads; j++) { 5439 for (int j = 0; j < no_of_gc_threads; j++) {
5417 assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); 5440 assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
5418 total += _cursor[j]; 5441 total += _cursor[j];
5419 } 5442 }
5420 assert(total == _survivor_chunk_index, "Ctl Pt Invariant"); 5443 assert(total == _survivor_chunk_index, "Ctl Pt Invariant");
5421 // Check that the merged array is in sorted order 5444 // Check that the merged array is in sorted order
5446 SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks(); 5469 SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
5447 assert(!pst->valid(), "Clobbering existing data?"); 5470 assert(!pst->valid(), "Clobbering existing data?");
5448 // Each valid entry in [0, _eden_chunk_index) represents a task. 5471 // Each valid entry in [0, _eden_chunk_index) represents a task.
5449 size_t n_tasks = _eden_chunk_index + 1; 5472 size_t n_tasks = _eden_chunk_index + 1;
5450 assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); 5473 assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
5451 pst->set_par_threads(n_threads); 5474 // Sets the condition for completion of the subtask (how many threads
5475 // need to finish in order to be done).
5476 pst->set_n_threads(n_threads);
5452 pst->set_n_tasks((int)n_tasks); 5477 pst->set_n_tasks((int)n_tasks);
5453 } 5478 }
5454 5479
5455 // Merge the survivor plab arrays into _survivor_chunk_array 5480 // Merge the survivor plab arrays into _survivor_chunk_array
5456 if (_survivor_plab_array != NULL) { 5481 if (_survivor_plab_array != NULL) {
5457 merge_survivor_plab_arrays(dng->from()); 5482 merge_survivor_plab_arrays(dng->from(), n_threads);
5458 } else { 5483 } else {
5459 assert(_survivor_chunk_index == 0, "Error"); 5484 assert(_survivor_chunk_index == 0, "Error");
5460 } 5485 }
5461 5486
5462 // To space 5487 // To space
5463 { 5488 {
5464 SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); 5489 SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
5465 assert(!pst->valid(), "Clobbering existing data?"); 5490 assert(!pst->valid(), "Clobbering existing data?");
5466 pst->set_par_threads(n_threads); 5491 // Sets the condition for completion of the subtask (how many threads
5492 // need to finish in order to be done).
5493 pst->set_n_threads(n_threads);
5467 pst->set_n_tasks(1); 5494 pst->set_n_tasks(1);
5468 assert(pst->valid(), "Error"); 5495 assert(pst->valid(), "Error");
5469 } 5496 }
5470 5497
5471 // From space 5498 // From space
5472 { 5499 {
5473 SequentialSubTasksDone* pst = dng->from()->par_seq_tasks(); 5500 SequentialSubTasksDone* pst = dng->from()->par_seq_tasks();
5474 assert(!pst->valid(), "Clobbering existing data?"); 5501 assert(!pst->valid(), "Clobbering existing data?");
5475 size_t n_tasks = _survivor_chunk_index + 1; 5502 size_t n_tasks = _survivor_chunk_index + 1;
5476 assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); 5503 assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
5477 pst->set_par_threads(n_threads); 5504 // Sets the condition for completion of the subtask (how many threads
5505 // need to finish in order to be done).
5506 pst->set_n_threads(n_threads);
5478 pst->set_n_tasks((int)n_tasks); 5507 pst->set_n_tasks((int)n_tasks);
5479 assert(pst->valid(), "Error"); 5508 assert(pst->valid(), "Error");
5480 } 5509 }
5481 } 5510 }
5482 5511
5483 // Parallel version of remark 5512 // Parallel version of remark
5484 void CMSCollector::do_remark_parallel() { 5513 void CMSCollector::do_remark_parallel() {
5485 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5514 GenCollectedHeap* gch = GenCollectedHeap::heap();
5486 WorkGang* workers = gch->workers(); 5515 FlexibleWorkGang* workers = gch->workers();
5487 assert(workers != NULL, "Need parallel worker threads."); 5516 assert(workers != NULL, "Need parallel worker threads.");
5488 int n_workers = workers->total_workers(); 5517 int n_workers = workers->total_workers();
5489 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 5518 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
5490 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 5519 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
5491 5520
5634 } 5663 }
5635 5664
5636 //////////////////////////////////////////////////////// 5665 ////////////////////////////////////////////////////////
5637 // Parallel Reference Processing Task Proxy Class 5666 // Parallel Reference Processing Task Proxy Class
5638 //////////////////////////////////////////////////////// 5667 ////////////////////////////////////////////////////////
5639 class CMSRefProcTaskProxy: public AbstractGangTask { 5668 class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
5640 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 5669 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
5641 CMSCollector* _collector; 5670 CMSCollector* _collector;
5642 CMSBitMap* _mark_bit_map; 5671 CMSBitMap* _mark_bit_map;
5643 const MemRegion _span; 5672 const MemRegion _span;
5644 OopTaskQueueSet* _task_queues;
5645 ParallelTaskTerminator _term;
5646 ProcessTask& _task; 5673 ProcessTask& _task;
5647 5674
5648 public: 5675 public:
5649 CMSRefProcTaskProxy(ProcessTask& task, 5676 CMSRefProcTaskProxy(ProcessTask& task,
5650 CMSCollector* collector, 5677 CMSCollector* collector,
5651 const MemRegion& span, 5678 const MemRegion& span,
5652 CMSBitMap* mark_bit_map, 5679 CMSBitMap* mark_bit_map,
5653 int total_workers, 5680 AbstractWorkGang* workers,
5654 OopTaskQueueSet* task_queues): 5681 OopTaskQueueSet* task_queues):
5655 AbstractGangTask("Process referents by policy in parallel"), 5682 AbstractGangTaskWOopQueues("Process referents by policy in parallel",
5683 task_queues),
5656 _task(task), 5684 _task(task),
5657 _collector(collector), _span(span), _mark_bit_map(mark_bit_map), 5685 _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
5658 _task_queues(task_queues),
5659 _term(total_workers, task_queues)
5660 { 5686 {
5661 assert(_collector->_span.equals(_span) && !_span.is_empty(), 5687 assert(_collector->_span.equals(_span) && !_span.is_empty(),
5662 "Inconsistency in _span"); 5688 "Inconsistency in _span");
5663 } 5689 }
5664 5690
5665 OopTaskQueueSet* task_queues() { return _task_queues; } 5691 OopTaskQueueSet* task_queues() { return queues(); }
5666 5692
5667 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 5693 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5668
5669 ParallelTaskTerminator* terminator() { return &_term; }
5670 5694
5671 void do_work_steal(int i, 5695 void do_work_steal(int i,
5672 CMSParDrainMarkingStackClosure* drain, 5696 CMSParDrainMarkingStackClosure* drain,
5673 CMSParKeepAliveClosure* keep_alive, 5697 CMSParKeepAliveClosure* keep_alive,
5674 int* seed); 5698 int* seed);
5737 // Completely finish any left over work from (an) earlier round(s) 5761 // Completely finish any left over work from (an) earlier round(s)
5738 drain->trim_queue(0); 5762 drain->trim_queue(0);
5739 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, 5763 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5740 (size_t)ParGCDesiredObjsFromOverflowList); 5764 (size_t)ParGCDesiredObjsFromOverflowList);
5741 // Now check if there's any work in the overflow list 5765 // Now check if there's any work in the overflow list
5766 // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5767 // only affects the number of attempts made to get work from the
5768 // overflow list and does not affect the number of workers. Just
5769 // pass ParallelGCThreads so this behavior is unchanged.
5742 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 5770 if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5743 work_q)) { 5771 work_q,
5772 ParallelGCThreads)) {
5744 // Found something in global overflow list; 5773 // Found something in global overflow list;
5745 // not yet ready to go stealing work from others. 5774 // not yet ready to go stealing work from others.
5746 // We'd like to assert(work_q->size() != 0, ...) 5775 // We'd like to assert(work_q->size() != 0, ...)
5747 // because we just took work from the overflow list, 5776 // because we just took work from the overflow list,
5748 // but of course we can't, since all of that might have 5777 // but of course we can't, since all of that might have
5771 } 5800 }
5772 5801
5773 void CMSRefProcTaskExecutor::execute(ProcessTask& task) 5802 void CMSRefProcTaskExecutor::execute(ProcessTask& task)
5774 { 5803 {
5775 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5804 GenCollectedHeap* gch = GenCollectedHeap::heap();
5776 WorkGang* workers = gch->workers(); 5805 FlexibleWorkGang* workers = gch->workers();
5777 assert(workers != NULL, "Need parallel worker threads."); 5806 assert(workers != NULL, "Need parallel worker threads.");
5778 int n_workers = workers->total_workers();
5779 CMSRefProcTaskProxy rp_task(task, &_collector, 5807 CMSRefProcTaskProxy rp_task(task, &_collector,
5780 _collector.ref_processor()->span(), 5808 _collector.ref_processor()->span(),
5781 _collector.markBitMap(), 5809 _collector.markBitMap(),
5782 n_workers, _collector.task_queues()); 5810 workers, _collector.task_queues());
5783 workers->run_task(&rp_task); 5811 workers->run_task(&rp_task);
5784 } 5812 }
5785 5813
5786 void CMSRefProcTaskExecutor::execute(EnqueueTask& task) 5814 void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
5787 { 5815 {
5788 5816
5789 GenCollectedHeap* gch = GenCollectedHeap::heap(); 5817 GenCollectedHeap* gch = GenCollectedHeap::heap();
5790 WorkGang* workers = gch->workers(); 5818 FlexibleWorkGang* workers = gch->workers();
5791 assert(workers != NULL, "Need parallel worker threads."); 5819 assert(workers != NULL, "Need parallel worker threads.");
5792 CMSRefEnqueueTaskProxy enq_task(task); 5820 CMSRefEnqueueTaskProxy enq_task(task);
5793 workers->run_task(&enq_task); 5821 workers->run_task(&enq_task);
5794 } 5822 }
5795 5823
5812 _span, &_markBitMap, &_markStack, 5840 _span, &_markBitMap, &_markStack,
5813 &cmsKeepAliveClosure, false /* !preclean */); 5841 &cmsKeepAliveClosure, false /* !preclean */);
5814 { 5842 {
5815 TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); 5843 TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
5816 if (rp->processing_is_mt()) { 5844 if (rp->processing_is_mt()) {
5845 // Set the degree of MT here. If the discovery is done MT, there
5846 // may have been a different number of threads doing the discovery
5847 // and a different number of discovered lists may have Ref objects.
5848 // That is OK as long as the Reference lists are balanced (see
5849 // balance_all_queues() and balance_queues()).
5850
5851
5852 rp->set_mt_degree(ParallelGCThreads);
5817 CMSRefProcTaskExecutor task_executor(*this); 5853 CMSRefProcTaskExecutor task_executor(*this);
5818 rp->process_discovered_references(&_is_alive_closure, 5854 rp->process_discovered_references(&_is_alive_closure,
5819 &cmsKeepAliveClosure, 5855 &cmsKeepAliveClosure,
5820 &cmsDrainMarkingStackClosure, 5856 &cmsDrainMarkingStackClosure,
5821 &task_executor); 5857 &task_executor);
5872 // work queue overflow 5908 // work queue overflow
5873 restore_preserved_marks_if_any(); // done single-threaded for now 5909 restore_preserved_marks_if_any(); // done single-threaded for now
5874 5910
5875 rp->set_enqueuing_is_done(true); 5911 rp->set_enqueuing_is_done(true);
5876 if (rp->processing_is_mt()) { 5912 if (rp->processing_is_mt()) {
5913 rp->balance_all_queues();
5877 CMSRefProcTaskExecutor task_executor(*this); 5914 CMSRefProcTaskExecutor task_executor(*this);
5878 rp->enqueue_discovered_references(&task_executor); 5915 rp->enqueue_discovered_references(&task_executor);
5879 } else { 5916 } else {
5880 rp->enqueue_discovered_references(NULL); 5917 rp->enqueue_discovered_references(NULL);
5881 } 5918 }
8706 // Because of the common code, if you make any changes in 8743 // Because of the common code, if you make any changes in
8707 // the code below, please check the ParNew version to see if 8744 // the code below, please check the ParNew version to see if
8708 // similar changes might be needed. 8745 // similar changes might be needed.
8709 // CR 6797058 has been filed to consolidate the common code. 8746 // CR 6797058 has been filed to consolidate the common code.
8710 bool CMSCollector::par_take_from_overflow_list(size_t num, 8747 bool CMSCollector::par_take_from_overflow_list(size_t num,
8711 OopTaskQueue* work_q) { 8748 OopTaskQueue* work_q,
8749 int no_of_gc_threads) {
8712 assert(work_q->size() == 0, "First empty local work queue"); 8750 assert(work_q->size() == 0, "First empty local work queue");
8713 assert(num < work_q->max_elems(), "Can't bite more than we can chew"); 8751 assert(num < work_q->max_elems(), "Can't bite more than we can chew");
8714 if (_overflow_list == NULL) { 8752 if (_overflow_list == NULL) {
8715 return false; 8753 return false;
8716 } 8754 }
8717 // Grab the entire list; we'll put back a suffix 8755 // Grab the entire list; we'll put back a suffix
8718 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); 8756 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
8719 Thread* tid = Thread::current(); 8757 Thread* tid = Thread::current();
8720 size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; 8758 // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
8759 // set to ParallelGCThreads.
8760 size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
8721 size_t sleep_time_millis = MAX2((size_t)1, num/100); 8761 size_t sleep_time_millis = MAX2((size_t)1, num/100);
8722 // If the list is busy, we spin for a short while, 8762 // If the list is busy, we spin for a short while,
8723 // sleeping between attempts to get the list. 8763 // sleeping between attempts to get the list.
8724 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) { 8764 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
8725 os::sleep(tid, sleep_time_millis, false); 8765 os::sleep(tid, sleep_time_millis, false);
9254 true /* recordPostGCusage */, 9294 true /* recordPostGCusage */,
9255 true /* recordAccumulatedGCTime */, 9295 true /* recordAccumulatedGCTime */,
9256 true /* recordGCEndTime */, 9296 true /* recordGCEndTime */,
9257 true /* countCollection */ ); 9297 true /* countCollection */ );
9258 } 9298 }
9259