graal-jvmci-8: src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp comparison

comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 1833:8b10f48633dc

6984287: Regularize how GC parallel workers are specified. Summary: Associate number of GC workers with the workgang as opposed to the task. Reviewed-by: johnc, ysr

author	jmasa
date	Mon, 20 Sep 2010 14:38:38 -0700
parents	179464550c7d
children	894b1d7c7e01

comparison

equal deleted inserted replaced

-:97fbf5beff7b
+:8b10f48633dc
 assert(UseCompressedOops ||
 junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
 "Offset of FreeChunk::_prev within FreeChunk must match"
 "  that of OopDesc::_klass within OopDesc");
 )
-if (ParallelGCThreads > 0) {
+if (CollectedHeap::use_parallel_gc_threads()) {
 typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
 _par_gc_thread_states =
 NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
 if (_par_gc_thread_states == NULL) {
 vm_exit_during_initialization("Could not allocate par gc structs");
 warning("Failed to allocate CMS Revisit Stack");
 return;
 }
 // Support for multi-threaded concurrent phases
-if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
+if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
 if (FLAG_IS_DEFAULT(ConcGCThreads)) {
 // just for now
 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
 }
 if (ConcGCThreads > 1) {
 ConcGCThreads, true);
 if (_conc_workers == NULL) {
 warning("GC/CMS: _conc_workers allocation failure: "
 "forcing -CMSConcurrentMTEnabled");
 CMSConcurrentMTEnabled = false;
+} else {
+_conc_workers->initialize_workers();
 }
 } else {
 CMSConcurrentMTEnabled = false;
 }
 }
 void ConcurrentMarkSweepGeneration::reset_after_compaction() {
 // Clear the promotion information.  These pointers can be adjusted
 // along with all the other pointers into the heap but
 // compaction is expected to be a rare event with
 // a heap using cms so don't do it without seeing the need.
-if (ParallelGCThreads > 0) {
+if (CollectedHeap::use_parallel_gc_threads()) {
 for (uint i = 0; i < ParallelGCThreads; i++) {
 _par_gc_thread_states[i]->promo.reset();
 }
 }
 }
 bitMapLock()->lock_without_safepoint_check();
 // Should call gc_prologue_work() for all cms gens we are responsible for
 bool registerClosure =    _collectorState >= Marking
 && _collectorState < Sweeping;
-ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
+ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
+&_modUnionClosurePar
 : &_modUnionClosure;
 _cmsGen->gc_prologue_work(full, registerClosure, muc);
 _permGen->gc_prologue_work(full, registerClosure, muc);
 if (!full) {
 void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
 collector()->gc_epilogue(full);
 // Also reset promotion tracking in par gc thread states.
-if (ParallelGCThreads > 0) {
+if (CollectedHeap::use_parallel_gc_threads()) {
 for (uint i = 0; i < ParallelGCThreads; i++) {
 _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
 }
 }
 }
 };
 // MT Concurrent Marking Task
 class CMSConcMarkingTask: public YieldingFlexibleGangTask {
 CMSCollector* _collector;
-YieldingFlexibleWorkGang* _workers;        // the whole gang
 int           _n_workers;                  // requested/desired # workers
 bool          _asynch;
 bool          _result;
 CompactibleFreeListSpace*  _cms_space;
 CompactibleFreeListSpace* _perm_space;
 public:
 CMSConcMarkingTask(CMSCollector* collector,
 CompactibleFreeListSpace* cms_space,
 CompactibleFreeListSpace* perm_space,
-bool asynch, int n_workers,
+bool asynch,
 YieldingFlexibleWorkGang* workers,
 OopTaskQueueSet* task_queues):
 YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
 _collector(collector),
 _cms_space(cms_space),
 _perm_space(perm_space),
-_asynch(asynch), _n_workers(n_workers), _result(true),
+_asynch(asynch), _n_workers(0), _result(true),
-_workers(workers), _task_queues(task_queues),
+_task_queues(task_queues),
-_term(n_workers, task_queues, _collector, asynch),
+_term(_n_workers, task_queues, _collector, asynch),
 _bit_map_lock(collector->bitMapLock())
 {
-assert(n_workers <= workers->total_workers(),
+_requested_size = _n_workers;
-"Else termination won't work correctly today"); // XXX FIX ME!
-_requested_size = n_workers;
 _term.set_task(this);
 assert(_cms_space->bottom() < _perm_space->bottom(),
 "Finger incorrectly initialized below");
 _restart_addr = _global_finger = _cms_space->bottom();
 }
 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
 HeapWord** global_finger_addr() { return &_global_finger; }
 CMSConcMarkingTerminator* terminator() { return &_term; }
+virtual void set_for_termination(int active_workers) {
+terminator()->reset_for_reuse(active_workers);
+}
 void work(int i);
 virtual void coordinator_yield();  // stuff done by coordinator
 bool result() { return _result; }
 int num_workers = ConcGCThreads;
 CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
-CMSConcMarkingTask tsk(this, cms_space, perm_space,
+CMSConcMarkingTask tsk(this,
-asynch, num_workers /* number requested XXX */,
+cms_space,
-conc_workers(), task_queues());
+perm_space,
+asynch,
+conc_workers(),
+task_queues());
 // Since the actual number of workers we get may be different
 // from the number we requested above, do we need to do anything different
 // below? In particular, may be we need to subclass the SequantialSubTasksDone
 // class?? XXX
 assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
 verify_work_stacks_empty();
 verify_overflow_empty();
 _abort_preclean = false;
 if (CMSPrecleaningEnabled) {
+// Precleaning is currently not MT but the reference processor
+// may be set for MT.  Disable it temporarily here.
+ReferenceProcessor* rp = ref_processor();
+ReferenceProcessorMTProcMutator z(rp, false);
 _eden_chunk_index = 0;
 size_t used = get_eden_used();
 size_t capacity = get_eden_capacity();
 // Don't start sampling unless we will get sufficiently
 // many samples.
 // A would be collected. Such updates to references in marked objects
 // are detected via the mod union table which is the set of all cards
 // dirtied since the first checkpoint in this GC cycle and prior to
 // the most recent young generation GC, minus those cleaned up by the
 // concurrent precleaning.
-if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
+if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
 TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
 do_remark_parallel();
 } else {
 TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
 gclog_or_tty);
 }
 // Parallel remark task
 class CMSParRemarkTask: public AbstractGangTask {
 CMSCollector* _collector;
-WorkGang*     _workers;
 int           _n_workers;
 CompactibleFreeListSpace* _cms_space;
 CompactibleFreeListSpace* _perm_space;
 // The per-thread work queues, available here for stealing.
 public:
 CMSParRemarkTask(CMSCollector* collector,
 CompactibleFreeListSpace* cms_space,
 CompactibleFreeListSpace* perm_space,
-int n_workers, WorkGang* workers,
+int n_workers, FlexibleWorkGang* workers,
 OopTaskQueueSet* task_queues):
 AbstractGangTask("Rescan roots and grey objects in parallel"),
 _collector(collector),
 _cms_space(cms_space), _perm_space(perm_space),
 _n_workers(n_workers),
-_workers(workers),
 _task_queues(task_queues),
-_term(workers->total_workers(), task_queues) { }
+_term(n_workers, task_queues) { }
 OopTaskQueueSet* task_queues() { return _task_queues; }
 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
 ParallelTaskTerminator* terminator() { return &_term; }
+int n_workers() { return _n_workers; }
 void work(int i);
 private:
 // Work method in support of parallel rescan ... of young gen spaces
 Par_MarkRefsIntoAndScanClosure* cl);
 // ... work stealing for the above
 void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
 };
+// work_queue(i) is passed to the closure
+// Par_MarkRefsIntoAndScanClosure.  The "i" parameter
+// also is passed to do_dirty_card_rescan_tasks() and to
+// do_work_steal() to select the i-th task_queue.
 void CMSParRemarkTask::work(int i) {
 elapsedTimer _timer;
 ResourceMark rm;
 HandleMark   hm;
 _timer.reset();
 _timer.start();
 // Do the rescan tasks for each of the two spaces
 // (cms_space and perm_space) in turn.
+// "i" is passed to select the "i-th" task_queue
 do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
 do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
 _timer.stop();
 if (PrintCMSStatistics != 0) {
 gclog_or_tty->print_cr(
 "Finished work stealing in %dth thread: %3.3f sec",
 i, _timer.seconds());
 }
 }
+// Note that parameter "i" is not used.
 void
 CMSParRemarkTask::do_young_space_rescan(int i,
 Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
 HeapWord** chunk_array, size_t chunk_top) {
 // Until all tasks completed:
 // Completely finish any left over work from (an) earlier round(s)
 cl->trim_queue(0);
 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
 (size_t)ParGCDesiredObjsFromOverflowList);
 // Now check if there's any work in the overflow list
+// Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+// only affects the number of attempts made to get work from the
+// overflow list and does not affect the number of workers.  Just
+// pass ParallelGCThreads so this behavior is unchanged.
 if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-work_q)) {
+work_q,
+ParallelGCThreads)) {
 // found something in global overflow list;
 // not yet ready to go stealing work from others.
 // We'd like to assert(work_q->size() != 0, ...)
 // because we just took work from the overflow list,
 // but of course we can't since all of that could have
 }
 // Merge the per-thread plab arrays into the global survivor chunk
 // array which will provide the partitioning of the survivor space
 // for CMS rescan.
-void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
+void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
+int no_of_gc_threads) {
 assert(_survivor_plab_array  != NULL, "Error");
 assert(_survivor_chunk_array != NULL, "Error");
 assert(_collectorState == FinalMarking, "Error");
-for (uint j = 0; j < ParallelGCThreads; j++) {
+for (int j = 0; j < no_of_gc_threads; j++) {
 _cursor[j] = 0;
 }
 HeapWord* top = surv->top();
 size_t i;
 for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
 HeapWord* min_val = top;          // Higher than any PLAB address
 uint      min_tid = 0;            // position of min_val this round
-for (uint j = 0; j < ParallelGCThreads; j++) {
+for (int j = 0; j < no_of_gc_threads; j++) {
 ChunkArray* cur_sca = &_survivor_plab_array[j];
 if (_cursor[j] == cur_sca->end()) {
 continue;
 }
 assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
 gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
 }
 // Verify that we used up all the recorded entries
 #ifdef ASSERT
 size_t total = 0;
-for (uint j = 0; j < ParallelGCThreads; j++) {
+for (int j = 0; j < no_of_gc_threads; j++) {
 assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
 total += _cursor[j];
 }
 assert(total == _survivor_chunk_index, "Ctl Pt Invariant");
 // Check that the merged array is in sorted order
 SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
 assert(!pst->valid(), "Clobbering existing data?");
 // Each valid entry in [0, _eden_chunk_index) represents a task.
 size_t n_tasks = _eden_chunk_index + 1;
 assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
-pst->set_par_threads(n_threads);
+// Sets the condition for completion of the subtask (how many threads
+// need to finish in order to be done).
+pst->set_n_threads(n_threads);
 pst->set_n_tasks((int)n_tasks);
 }
 // Merge the survivor plab arrays into _survivor_chunk_array
 if (_survivor_plab_array != NULL) {
-merge_survivor_plab_arrays(dng->from());
+merge_survivor_plab_arrays(dng->from(), n_threads);
 } else {
 assert(_survivor_chunk_index == 0, "Error");
 }
 // To space
 {
 SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
 assert(!pst->valid(), "Clobbering existing data?");
-pst->set_par_threads(n_threads);
+// Sets the condition for completion of the subtask (how many threads
+// need to finish in order to be done).
+pst->set_n_threads(n_threads);
 pst->set_n_tasks(1);
 assert(pst->valid(), "Error");
 }
 // From space
 {
 SequentialSubTasksDone* pst = dng->from()->par_seq_tasks();
 assert(!pst->valid(), "Clobbering existing data?");
 size_t n_tasks = _survivor_chunk_index + 1;
 assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
-pst->set_par_threads(n_threads);
+// Sets the condition for completion of the subtask (how many threads
+// need to finish in order to be done).
+pst->set_n_threads(n_threads);
 pst->set_n_tasks((int)n_tasks);
 assert(pst->valid(), "Error");
 }
 }
 // Parallel version of remark
 void CMSCollector::do_remark_parallel() {
 GenCollectedHeap* gch = GenCollectedHeap::heap();
-WorkGang* workers = gch->workers();
+FlexibleWorkGang* workers = gch->workers();
 assert(workers != NULL, "Need parallel worker threads.");
 int n_workers = workers->total_workers();
 CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
 }
 ////////////////////////////////////////////////////////
 // Parallel Reference Processing Task Proxy Class
 ////////////////////////////////////////////////////////
-class CMSRefProcTaskProxy: public AbstractGangTask {
+class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
 CMSCollector*          _collector;
 CMSBitMap*             _mark_bit_map;
 const MemRegion        _span;
-OopTaskQueueSet*       _task_queues;
-ParallelTaskTerminator _term;
 ProcessTask&           _task;
 public:
 CMSRefProcTaskProxy(ProcessTask&     task,
 CMSCollector*    collector,
 const MemRegion& span,
 CMSBitMap*       mark_bit_map,
-int              total_workers,
+AbstractWorkGang* workers,
 OopTaskQueueSet* task_queues):
-AbstractGangTask("Process referents by policy in parallel"),
+AbstractGangTaskWOopQueues("Process referents by policy in parallel",
+task_queues),
 _task(task),
-_collector(collector), _span(span), _mark_bit_map(mark_bit_map),
+_collector(collector), _span(span), _mark_bit_map(mark_bit_map)
-_task_queues(task_queues),
-_term(total_workers, task_queues)
 {
 assert(_collector->_span.equals(_span) && !_span.is_empty(),
 "Inconsistency in _span");
 }
-OopTaskQueueSet* task_queues() { return _task_queues; }
+OopTaskQueueSet* task_queues() { return queues(); }
 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
-ParallelTaskTerminator* terminator() { return &_term; }
 void do_work_steal(int i,
 CMSParDrainMarkingStackClosure* drain,
 CMSParKeepAliveClosure* keep_alive,
 int* seed);
 // Completely finish any left over work from (an) earlier round(s)
 drain->trim_queue(0);
 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
 (size_t)ParGCDesiredObjsFromOverflowList);
 // Now check if there's any work in the overflow list
+// Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+// only affects the number of attempts made to get work from the
+// overflow list and does not affect the number of workers.  Just
+// pass ParallelGCThreads so this behavior is unchanged.
 if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-work_q)) {
+work_q,
+ParallelGCThreads)) {
 // Found something in global overflow list;
 // not yet ready to go stealing work from others.
 // We'd like to assert(work_q->size() != 0, ...)
 // because we just took work from the overflow list,
 // but of course we can't, since all of that might have
 }
 void CMSRefProcTaskExecutor::execute(ProcessTask& task)
 {
 GenCollectedHeap* gch = GenCollectedHeap::heap();
-WorkGang* workers = gch->workers();
+FlexibleWorkGang* workers = gch->workers();
 assert(workers != NULL, "Need parallel worker threads.");
-int n_workers = workers->total_workers();
 CMSRefProcTaskProxy rp_task(task, &_collector,
 _collector.ref_processor()->span(),
 _collector.markBitMap(),
-n_workers, _collector.task_queues());
+workers, _collector.task_queues());
 workers->run_task(&rp_task);
 }
 void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
 {
 GenCollectedHeap* gch = GenCollectedHeap::heap();
-WorkGang* workers = gch->workers();
+FlexibleWorkGang* workers = gch->workers();
 assert(workers != NULL, "Need parallel worker threads.");
 CMSRefEnqueueTaskProxy enq_task(task);
 workers->run_task(&enq_task);
 }
 _span, &_markBitMap, &_markStack,
 &cmsKeepAliveClosure, false /* !preclean */);
 {
 TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
 if (rp->processing_is_mt()) {
+// Set the degree of MT here.  If the discovery is done MT, there
+// may have been a different number of threads doing the discovery
+// and a different number of discovered lists may have Ref objects.
+// That is OK as long as the Reference lists are balanced (see
+// balance_all_queues() and balance_queues()).
+rp->set_mt_degree(ParallelGCThreads);
 CMSRefProcTaskExecutor task_executor(*this);
 rp->process_discovered_references(&_is_alive_closure,
 &cmsKeepAliveClosure,
 &cmsDrainMarkingStackClosure,
 &task_executor);
 // work queue overflow
 restore_preserved_marks_if_any();  // done single-threaded for now
 rp->set_enqueuing_is_done(true);
 if (rp->processing_is_mt()) {
+rp->balance_all_queues();
 CMSRefProcTaskExecutor task_executor(*this);
 rp->enqueue_discovered_references(&task_executor);
 } else {
 rp->enqueue_discovered_references(NULL);
 }
 // Because of the common code, if you make any changes in
 // the code below, please check the ParNew version to see if
 // similar changes might be needed.
 // CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
-OopTaskQueue* work_q) {
+OopTaskQueue* work_q,
+int no_of_gc_threads) {
 assert(work_q->size() == 0, "First empty local work queue");
 assert(num < work_q->max_elems(), "Can't bite more than we can chew");
 if (_overflow_list == NULL) {
 return false;
 }
 // Grab the entire list; we'll put back a suffix
 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
 Thread* tid = Thread::current();
-size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+// Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
+// set to ParallelGCThreads.
+size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
 size_t sleep_time_millis = MAX2((size_t)1, num/100);
 // If the list is busy, we spin for a short while,
 // sleeping between attempts to get the list.
 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
 os::sleep(tid, sleep_time_millis, false);
 true /* recordPostGCusage */,
 true /* recordAccumulatedGCTime */,
 true /* recordGCEndTime */,
 true /* countCollection */ );
 }

Mercurial > hg > graal-jvmci-8

comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 1833:8b10f48633dc