# HG changeset patch # User jcoomes # Date 1286555349 25200 # Node ID 0715f0cf171d3ef573cd75a098b15825ed6ccec3 # Parent 22e4420d19f7d600bcb28f4d68a16f3a084ac7f0# Parent 6e0aac35bfa97bb4cf59968a26d8a66f98a16981 Merge diff -r 22e4420d19f7 -r 0715f0cf171d make/solaris/makefiles/sparcWorks.make --- a/make/solaris/makefiles/sparcWorks.make Wed Oct 06 14:18:32 2010 -0700 +++ b/make/solaris/makefiles/sparcWorks.make Fri Oct 08 09:29:09 2010 -0700 @@ -51,9 +51,9 @@ VALIDATED_COMPILER_REVS := 5.8 VALIDATED_C_COMPILER_REVS := 5.8 else - # Validated compilers for JDK7 are SS12 (5.9) or SS12 update 1 (5.10) - VALIDATED_COMPILER_REVS := 5.9 5.10 - VALIDATED_C_COMPILER_REVS := 5.9 5.10 + # Validated compiler for JDK7 is SS12 update 1 + patches (5.10) + VALIDATED_COMPILER_REVS := 5.10 + VALIDATED_C_COMPILER_REVS := 5.10 endif # Warning messages about not using the above validated versions diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ if (_generations == NULL) vm_exit_during_initialization("Unable to allocate gen spec"); - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { if (UseAdaptiveSizePolicy) { _generations[0] = new GenerationSpec(Generation::ASParNew, _initial_gen0_size, _max_gen0_size); @@ -79,7 +79,7 @@ void ConcurrentMarkSweepPolicy::initialize_gc_policy_counters() { // initialize the policy counters - 2 collectors, 3 generations - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { _gc_policy_counters = new GCPolicyCounters("ParNew:CMS", 2, 3); } else { @@ -102,7 +102,7 @@ assert(size_policy() != NULL, "A size policy is required"); // initialize the policy counters - 2 collectors, 3 generations - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { _gc_policy_counters = new CMSGCAdaptivePolicyCounters("ParNew:CMS", 2, 3, size_policy()); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,6 +50,18 @@ } } +HeapWord* CMSPermGen::request_expand_and_allocate(Generation* gen, + size_t size, + GCCause::Cause prev_cause /* ignored */) { + HeapWord* obj = gen->expand_and_allocate(size, false); + if (gen->capacity() >= _capacity_expansion_limit) { + set_capacity_expansion_limit(gen->capacity() + MaxPermHeapExpansion); + assert(((ConcurrentMarkSweepGeneration*)gen)->should_concurrent_collect(), + "Should kick off a collection if one not in progress"); + } + return obj; +} + void CMSPermGen::compute_new_size() { _gen->compute_new_size(); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -33,6 +33,10 @@ // The "generation" view. ConcurrentMarkSweepGeneration* _gen; + // Override default implementation from PermGen + virtual HeapWord* request_expand_and_allocate(Generation* gen, size_t size, + GCCause::Cause prev_cause); + public: CMSPermGen(ReservedSpace rs, size_t initial_byte_size, CardTableRS* ct, FreeBlockDictionary::DictionaryChoice); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -124,7 +124,8 @@ checkFreeListConsistency(); // Initialize locks for parallel case. - if (ParallelGCThreads > 0) { + + if (CollectedHeap::use_parallel_gc_threads()) { for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { _indexedFreeListParLocks[i] = new Mutex(Mutex::leaf - 1, // == ExpandHeap_lock - 1 "a freelist par lock", @@ -1071,7 +1072,8 @@ // at address below "p" in finding the object that contains "p" // and those objects (if garbage) may have been modified to hold // live range information. - // assert(ParallelGCThreads > 0 || _bt.block_start(p) == p, "Should be a block boundary"); + // assert(CollectedHeap::use_parallel_gc_threads() || _bt.block_start(p) == p, + // "Should be a block boundary"); if (FreeChunk::indicatesFreeChunk(p)) return false; klassOop k = oop(p)->klass_or_null(); if (k != NULL) { @@ -2932,7 +2934,9 @@ "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } @@ -2972,6 +2976,8 @@ "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -195,7 +195,7 @@ "Offset of FreeChunk::_prev within FreeChunk must match" " that of OopDesc::_klass within OopDesc"); ) - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { typedef CMSParGCThreadState* CMSParGCThreadStatePtr; _par_gc_thread_states = NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); @@ -540,8 +540,6 @@ _is_alive_closure(_span, &_markBitMap), _restart_addr(NULL), _overflow_list(NULL), - _preserved_oop_stack(NULL), - _preserved_mark_stack(NULL), _stats(cmsGen), _eden_chunk_array(NULL), // may be set in ctor body _eden_chunk_capacity(0), // -- ditto -- @@ -616,7 +614,7 @@ } // Support for multi-threaded concurrent phases - if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { + if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { if (FLAG_IS_DEFAULT(ConcGCThreads)) { // just for now FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); @@ -628,6 +626,8 @@ warning("GC/CMS: _conc_workers allocation failure: " "forcing -CMSConcurrentMTEnabled"); CMSConcurrentMTEnabled = false; + } else { + _conc_workers->initialize_workers(); } } else { CMSConcurrentMTEnabled = false; @@ -936,7 +936,7 @@ // along with all the other pointers into the heap but // compaction is expected to be a rare event with // a heap using cms so don't do it without seeing the need. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.reset(); } @@ -2630,7 +2630,8 @@ // Should call gc_prologue_work() for all cms gens we are responsible for bool registerClosure = _collectorState >= Marking && _collectorState < Sweeping; - ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar + ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ? + &_modUnionClosurePar : &_modUnionClosure; _cmsGen->gc_prologue_work(full, registerClosure, muc); _permGen->gc_prologue_work(full, registerClosure, muc); @@ -2731,7 +2732,7 @@ collector()->gc_epilogue(full); // Also reset promotion tracking in par gc thread states. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); } @@ -3263,6 +3264,7 @@ ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size, bool tlab, bool parallel) { + CMSSynchronousYieldRequest yr; assert(!tlab, "Can't deal with TLAB allocation"); MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag); expand(word_size*HeapWordSize, MinHeapDeltaBytes, @@ -3709,20 +3711,26 @@ class CMSConcMarkingTerminator: public ParallelTaskTerminator { CMSCollector* _collector; CMSConcMarkingTask* _task; - bool _yield; - protected: + public: virtual void yield(); - public: + // "n_threads" is the number of threads to be terminated. // "queue_set" is a set of work queues of other threads. // "collector" is the CMS collector associated with this task terminator. // "yield" indicates whether we need the gang as a whole to yield. - CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, - CMSCollector* collector, bool yield) : + CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) : ParallelTaskTerminator(n_threads, queue_set), - _collector(collector), - _yield(yield) { } - + _collector(collector) { } + + void set_task(CMSConcMarkingTask* task) { + _task = task; + } +}; + +class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator { + CMSConcMarkingTask* _task; + public: + bool should_exit_termination(); void set_task(CMSConcMarkingTask* task) { _task = task; } @@ -3731,13 +3739,14 @@ // MT Concurrent Marking Task class CMSConcMarkingTask: public YieldingFlexibleGangTask { CMSCollector* _collector; - YieldingFlexibleWorkGang* _workers; // the whole gang int _n_workers; // requested/desired # workers bool _asynch; bool _result; CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; - HeapWord* _global_finger; + char _pad_front[64]; // padding to ... + HeapWord* _global_finger; // ... avoid sharing cache line + char _pad_back[64]; HeapWord* _restart_addr; // Exposed here for yielding support @@ -3745,28 +3754,30 @@ // The per thread work queues, available here for stealing OopTaskQueueSet* _task_queues; + + // Termination (and yielding) support CMSConcMarkingTerminator _term; + CMSConcMarkingTerminatorTerminator _term_term; public: CMSConcMarkingTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - bool asynch, int n_workers, + bool asynch, YieldingFlexibleWorkGang* workers, OopTaskQueueSet* task_queues): YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), - _asynch(asynch), _n_workers(n_workers), _result(true), - _workers(workers), _task_queues(task_queues), - _term(n_workers, task_queues, _collector, asynch), + _asynch(asynch), _n_workers(0), _result(true), + _task_queues(task_queues), + _term(_n_workers, task_queues, _collector), _bit_map_lock(collector->bitMapLock()) { - assert(n_workers <= workers->total_workers(), - "Else termination won't work correctly today"); // XXX FIX ME! - _requested_size = n_workers; + _requested_size = _n_workers; _term.set_task(this); + _term_term.set_task(this); assert(_cms_space->bottom() < _perm_space->bottom(), "Finger incorrectly initialized below"); _restart_addr = _global_finger = _cms_space->bottom(); @@ -3781,7 +3792,16 @@ CMSConcMarkingTerminator* terminator() { return &_term; } + virtual void set_for_termination(int active_workers) { + terminator()->reset_for_reuse(active_workers); + } + void work(int i); + bool should_yield() { + return ConcurrentMarkSweepThread::should_yield() + && !_collector->foregroundGCIsActive() + && _asynch; + } virtual void coordinator_yield(); // stuff done by coordinator bool result() { return _result; } @@ -3803,10 +3823,17 @@ void bump_global_finger(HeapWord* f); }; +bool CMSConcMarkingTerminatorTerminator::should_exit_termination() { + assert(_task != NULL, "Error"); + return _task->yielding(); + // Note that we do not need the disjunct || _task->should_yield() above + // because we want terminating threads to yield only if the task + // is already in the midst of yielding, which happens only after at least one + // thread has yielded. +} + void CMSConcMarkingTerminator::yield() { - if (ConcurrentMarkSweepThread::should_yield() && - !_collector->foregroundGCIsActive() && - _yield) { + if (_task->should_yield()) { _task->yield(); } else { ParallelTaskTerminator::yield(); @@ -4031,6 +4058,7 @@ class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure { private: + CMSConcMarkingTask* _task; MemRegion _span; CMSBitMap* _bit_map; CMSMarkStack* _overflow_stack; @@ -4038,11 +4066,12 @@ protected: DO_OOP_WORK_DEFN public: - Par_ConcMarkingClosure(CMSCollector* collector, OopTaskQueue* work_queue, + Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue, CMSBitMap* bit_map, CMSMarkStack* overflow_stack, CMSMarkStack* revisit_stack): Par_KlassRememberingOopClosure(collector, NULL, revisit_stack), - _span(_collector->_span), + _task(task), + _span(collector->_span), _work_queue(work_queue), _bit_map(bit_map), _overflow_stack(overflow_stack) @@ -4051,6 +4080,11 @@ virtual void do_oop(narrowOop* p); void trim_queue(size_t max); void handle_stack_overflow(HeapWord* lost); + void do_yield_check() { + if (_task->should_yield()) { + _task->yield(); + } + } }; // Grey object scanning during work stealing phase -- @@ -4094,6 +4128,7 @@ handle_stack_overflow(addr); } } // Else, some other thread got there first + do_yield_check(); } } @@ -4109,6 +4144,7 @@ assert(_span.contains((HeapWord*)new_oop), "Not in span"); assert(new_oop->is_parsable(), "Should be parsable"); new_oop->oop_iterate(this); // do_oop() above + do_yield_check(); } } } @@ -4136,7 +4172,7 @@ CMSMarkStack* ovflw = &(_collector->_markStack); CMSMarkStack* revisit = &(_collector->_revisitStack); int* seed = _collector->hash_seed(i); - Par_ConcMarkingClosure cl(_collector, work_q, bm, ovflw, revisit); + Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw, revisit); while (true) { cl.trim_queue(0); assert(work_q->size() == 0, "Should have been emptied above"); @@ -4149,9 +4185,11 @@ assert(obj_to_scan->is_oop(), "Should be an oop"); assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object"); obj_to_scan->oop_iterate(&cl); - } else if (terminator()->offer_termination()) { + } else if (terminator()->offer_termination(&_term_term)) { assert(work_q->size() == 0, "Impossible!"); break; + } else if (yielding() || should_yield()) { + yield(); } } } @@ -4220,9 +4258,12 @@ CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); - CMSConcMarkingTask tsk(this, cms_space, perm_space, - asynch, num_workers /* number requested XXX */, - conc_workers(), task_queues()); + CMSConcMarkingTask tsk(this, + cms_space, + perm_space, + asynch, + conc_workers(), + task_queues()); // Since the actual number of workers we get may be different // from the number we requested above, do we need to do anything different @@ -4326,6 +4367,10 @@ verify_overflow_empty(); _abort_preclean = false; if (CMSPrecleaningEnabled) { + // Precleaning is currently not MT but the reference processor + // may be set for MT. Disable it temporarily here. + ReferenceProcessor* rp = ref_processor(); + ReferenceProcessorMTProcMutator z(rp, false); _eden_chunk_index = 0; size_t used = get_eden_used(); size_t capacity = get_eden_capacity(); @@ -4918,7 +4963,7 @@ // dirtied since the first checkpoint in this GC cycle and prior to // the most recent young generation GC, minus those cleaned up by the // concurrent precleaning. - if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { + if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) { TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); do_remark_parallel(); } else { @@ -5012,7 +5057,6 @@ // Parallel remark task class CMSParRemarkTask: public AbstractGangTask { CMSCollector* _collector; - WorkGang* _workers; int _n_workers; CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; @@ -5025,21 +5069,21 @@ CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - int n_workers, WorkGang* workers, + int n_workers, FlexibleWorkGang* workers, OopTaskQueueSet* task_queues): AbstractGangTask("Rescan roots and grey objects in parallel"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), _n_workers(n_workers), - _workers(workers), _task_queues(task_queues), - _term(workers->total_workers(), task_queues) { } + _term(n_workers, task_queues) { } OopTaskQueueSet* task_queues() { return _task_queues; } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } ParallelTaskTerminator* terminator() { return &_term; } + int n_workers() { return _n_workers; } void work(int i); @@ -5057,6 +5101,11 @@ void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); }; +// work_queue(i) is passed to the closure +// Par_MarkRefsIntoAndScanClosure. The "i" parameter +// also is passed to do_dirty_card_rescan_tasks() and to +// do_work_steal() to select the i-th task_queue. + void CMSParRemarkTask::work(int i) { elapsedTimer _timer; ResourceMark rm; @@ -5128,6 +5177,7 @@ // Do the rescan tasks for each of the two spaces // (cms_space and perm_space) in turn. + // "i" is passed to select the "i-th" task_queue do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); _timer.stop(); @@ -5150,6 +5200,7 @@ } } +// Note that parameter "i" is not used. void CMSParRemarkTask::do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, @@ -5309,8 +5360,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5367,11 +5423,12 @@ // Merge the per-thread plab arrays into the global survivor chunk // array which will provide the partitioning of the survivor space // for CMS rescan. -void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { +void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv, + int no_of_gc_threads) { assert(_survivor_plab_array != NULL, "Error"); assert(_survivor_chunk_array != NULL, "Error"); assert(_collectorState == FinalMarking, "Error"); - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { _cursor[j] = 0; } HeapWord* top = surv->top(); @@ -5379,7 +5436,7 @@ for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries HeapWord* min_val = top; // Higher than any PLAB address uint min_tid = 0; // position of min_val this round - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { ChunkArray* cur_sca = &_survivor_plab_array[j]; if (_cursor[j] == cur_sca->end()) { continue; @@ -5413,7 +5470,7 @@ // Verify that we used up all the recorded entries #ifdef ASSERT size_t total = 0; - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); total += _cursor[j]; } @@ -5448,13 +5505,15 @@ // Each valid entry in [0, _eden_chunk_index) represents a task. size_t n_tasks = _eden_chunk_index + 1; assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } // Merge the survivor plab arrays into _survivor_chunk_array if (_survivor_plab_array != NULL) { - merge_survivor_plab_arrays(dng->from()); + merge_survivor_plab_arrays(dng->from(), n_threads); } else { assert(_survivor_chunk_index == 0, "Error"); } @@ -5463,7 +5522,9 @@ { SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks(1); assert(pst->valid(), "Error"); } @@ -5474,7 +5535,9 @@ assert(!pst->valid(), "Clobbering existing data?"); size_t n_tasks = _survivor_chunk_index + 1; assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); assert(pst->valid(), "Error"); } @@ -5483,7 +5546,7 @@ // Parallel version of remark void CMSCollector::do_remark_parallel() { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); int n_workers = workers->total_workers(); CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); @@ -5636,13 +5699,11 @@ //////////////////////////////////////////////////////// // Parallel Reference Processing Task Proxy Class //////////////////////////////////////////////////////// -class CMSRefProcTaskProxy: public AbstractGangTask { +class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; CMSCollector* _collector; CMSBitMap* _mark_bit_map; const MemRegion _span; - OopTaskQueueSet* _task_queues; - ParallelTaskTerminator _term; ProcessTask& _task; public: @@ -5650,24 +5711,21 @@ CMSCollector* collector, const MemRegion& span, CMSBitMap* mark_bit_map, - int total_workers, + AbstractWorkGang* workers, OopTaskQueueSet* task_queues): - AbstractGangTask("Process referents by policy in parallel"), + AbstractGangTaskWOopQueues("Process referents by policy in parallel", + task_queues), _task(task), - _collector(collector), _span(span), _mark_bit_map(mark_bit_map), - _task_queues(task_queues), - _term(total_workers, task_queues) + _collector(collector), _span(span), _mark_bit_map(mark_bit_map) { assert(_collector->_span.equals(_span) && !_span.is_empty(), "Inconsistency in _span"); } - OopTaskQueueSet* task_queues() { return _task_queues; } + OopTaskQueueSet* task_queues() { return queues(); } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } - ParallelTaskTerminator* terminator() { return &_term; } - void do_work_steal(int i, CMSParDrainMarkingStackClosure* drain, CMSParKeepAliveClosure* keep_alive, @@ -5739,8 +5797,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // Found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5773,13 +5836,12 @@ void CMSRefProcTaskExecutor::execute(ProcessTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); - int n_workers = workers->total_workers(); CMSRefProcTaskProxy rp_task(task, &_collector, _collector.ref_processor()->span(), _collector.markBitMap(), - n_workers, _collector.task_queues()); + workers, _collector.task_queues()); workers->run_task(&rp_task); } @@ -5787,7 +5849,7 @@ { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); CMSRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -5814,6 +5876,14 @@ { TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); if (rp->processing_is_mt()) { + // Set the degree of MT here. If the discovery is done MT, there + // may have been a different number of threads doing the discovery + // and a different number of discovered lists may have Ref objects. + // That is OK as long as the Reference lists are balanced (see + // balance_all_queues() and balance_queues()). + + + rp->set_mt_degree(ParallelGCThreads); CMSRefProcTaskExecutor task_executor(*this); rp->process_discovered_references(&_is_alive_closure, &cmsKeepAliveClosure, @@ -5874,6 +5944,7 @@ rp->set_enqueuing_is_done(true); if (rp->processing_is_mt()) { + rp->balance_all_queues(); CMSRefProcTaskExecutor task_executor(*this); rp->enqueue_discovered_references(&task_executor); } else { @@ -8708,7 +8779,8 @@ // similar changes might be needed. // CR 6797058 has been filed to consolidate the common code. bool CMSCollector::par_take_from_overflow_list(size_t num, - OopTaskQueue* work_q) { + OopTaskQueue* work_q, + int no_of_gc_threads) { assert(work_q->size() == 0, "First empty local work queue"); assert(num < work_q->max_elems(), "Can't bite more than we can chew"); if (_overflow_list == NULL) { @@ -8717,7 +8789,9 @@ // Grab the entire list; we'll put back a suffix oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); Thread* tid = Thread::current(); - size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; + // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was + // set to ParallelGCThreads. + size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads; size_t sleep_time_millis = MAX2((size_t)1, num/100); // If the list is busy, we spin for a short while, // sleeping between attempts to get the list. @@ -8867,23 +8941,10 @@ // failures where possible, thus, incrementally hardening the VM // in such low resource situations. void CMSCollector::preserve_mark_work(oop p, markOop m) { - if (_preserved_oop_stack == NULL) { - assert(_preserved_mark_stack == NULL, - "bijection with preserved_oop_stack"); - // Allocate the stacks - _preserved_oop_stack = new (ResourceObj::C_HEAP) - GrowableArray(PreserveMarkStackSize, true); - _preserved_mark_stack = new (ResourceObj::C_HEAP) - GrowableArray(PreserveMarkStackSize, true); - if (_preserved_oop_stack == NULL || _preserved_mark_stack == NULL) { - vm_exit_out_of_memory(2* PreserveMarkStackSize * sizeof(oop) /* punt */, - "Preserved Mark/Oop Stack for CMS (C-heap)"); - } - } - _preserved_oop_stack->push(p); - _preserved_mark_stack->push(m); + _preserved_oop_stack.push(p); + _preserved_mark_stack.push(m); assert(m == p->mark(), "Mark word changed"); - assert(_preserved_oop_stack->length() == _preserved_mark_stack->length(), + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), "bijection"); } @@ -8925,42 +8986,30 @@ // effect on performance so great that this will // likely just be in the noise anyway. void CMSCollector::restore_preserved_marks_if_any() { - if (_preserved_oop_stack == NULL) { - assert(_preserved_mark_stack == NULL, - "bijection with preserved_oop_stack"); - return; - } - assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); assert(Thread::current()->is_ConcurrentGC_thread() || Thread::current()->is_VM_thread(), "should be single-threaded"); - - int length = _preserved_oop_stack->length(); - assert(_preserved_mark_stack->length() == length, "bijection"); - for (int i = 0; i < length; i++) { - oop p = _preserved_oop_stack->at(i); + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), + "bijection"); + + while (!_preserved_oop_stack.is_empty()) { + oop p = _preserved_oop_stack.pop(); assert(p->is_oop(), "Should be an oop"); assert(_span.contains(p), "oop should be in _span"); assert(p->mark() == markOopDesc::prototype(), "Set when taken from overflow list"); - markOop m = _preserved_mark_stack->at(i); + markOop m = _preserved_mark_stack.pop(); p->set_mark(m); } - _preserved_mark_stack->clear(); - _preserved_oop_stack->clear(); - assert(_preserved_mark_stack->is_empty() && - _preserved_oop_stack->is_empty(), + assert(_preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(), "stacks were cleared above"); } #ifndef PRODUCT bool CMSCollector::no_preserved_marks() const { - return ( ( _preserved_mark_stack == NULL - && _preserved_oop_stack == NULL) - || ( _preserved_mark_stack->is_empty() - && _preserved_oop_stack->is_empty())); + return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(); } #endif @@ -9256,4 +9305,3 @@ true /* recordGCEndTime */, true /* countCollection */ ); } - diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -537,8 +537,8 @@ // The following array-pair keeps track of mark words // displaced for accomodating overflow list above. // This code will likely be revisited under RFE#4922830. - GrowableArray* _preserved_oop_stack; - GrowableArray* _preserved_mark_stack; + Stack _preserved_oop_stack; + Stack _preserved_mark_stack; int* _hash_seed; @@ -729,7 +729,9 @@ // Support for marking stack overflow handling bool take_from_overflow_list(size_t num, CMSMarkStack* to_stack); - bool par_take_from_overflow_list(size_t num, OopTaskQueue* to_work_q); + bool par_take_from_overflow_list(size_t num, + OopTaskQueue* to_work_q, + int no_of_gc_threads); void push_on_overflow_list(oop p); void par_push_on_overflow_list(oop p); // the following is, obviously, not, in general, "MT-stable" @@ -768,7 +770,7 @@ void abortable_preclean(); // Preclean while looking for possible abort void initialize_sequential_subtasks_for_young_gen_rescan(int i); // Helper function for above; merge-sorts the per-thread plab samples - void merge_survivor_plab_arrays(ContiguousSpace* surv); + void merge_survivor_plab_arrays(ContiguousSpace* surv, int no_of_gc_threads); // Resets (i.e. clears) the per-thread plab sample vectors void reset_survivor_plab_arrays(); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -123,24 +123,44 @@ // or given timeout, whichever is earlier. void wait_on_cms_lock(long t); // milliseconds - // The CMS thread will yield during the work portion of it's cycle + // The CMS thread will yield during the work portion of its cycle // only when requested to. Both synchronous and asychronous requests - // are provided. A synchronous request is used for young gen - // collections and direct allocations. The requesting thread increments - // pending_yields at the beginning of an operation, and decrements it when - // the operation is completed. The CMS thread yields when pending_yields - // is positive. An asynchronous request is used by iCMS in the stop_icms() - // operation. A single yield satisfies the outstanding asynch yield requests. - // The requesting thread increments both pending_yields and pending_decrements. - // After yielding, the CMS thread decrements both by the amount in - // pending_decrements. + // are provided: + // (1) A synchronous request is used for young gen collections and + // for direct allocations. The requesting thread increments + // _pending_yields at the beginning of an operation, and decrements + // _pending_yields when that operation is completed. + // In turn, the CMS thread yields when _pending_yields is positive, + // and continues to yield until the value reverts to 0. + // (2) An asynchronous request, on the other hand, is used by iCMS + // for the stop_icms() operation. A single yield satisfies all of + // the outstanding asynch yield requests, of which there may + // occasionally be several in close succession. To accomplish + // this, an asynch-requesting thread atomically increments both + // _pending_yields and _pending_decrements. An asynchr requesting + // thread does not wait and "acknowledge" completion of an operation + // and deregister the request, like the synchronous version described + // above does. In turn, after yielding, the CMS thread decrements both + // _pending_yields and _pending_decrements by the value seen in + // _pending_decrements before the decrement. + // NOTE: The above scheme is isomorphic to having two request counters, + // one for async requests and one for sync requests, and for the CMS thread + // to check the sum of the two counters to decide whether it should yield + // and to clear only the async counter when it yields. However, it turns out + // to be more efficient for CMS code to just check a single counter + // _pending_yields that holds the sum (of both sync and async requests), and + // a second counter _pending_decrements that only holds the async requests, + // for greater efficiency, since in a typical CMS run, there are many more + // pontential (i.e. static) yield points than there are actual + // (i.e. dynamic) yields because of requests, which are few and far between. + // // Note that, while "_pending_yields >= _pending_decrements" is an invariant, // we cannot easily test that invariant, since the counters are manipulated via // atomic instructions without explicit locking and we cannot read // the two counters atomically together: one suggestion is to // use (for example) 16-bit counters so as to be able to read the // two counters atomically even on 32-bit platforms. Notice that - // the second assert in acknowledge_yield_request() does indeed + // the second assert in acknowledge_yield_request() below does indeed // check a form of the above invariant, albeit indirectly. static void increment_pending_yields() { @@ -152,6 +172,7 @@ assert(_pending_yields >= 0, "can't be negative"); } static void asynchronous_yield_request() { + assert(CMSIncrementalMode, "Currently only used w/iCMS"); increment_pending_yields(); Atomic::inc(&_pending_decrements); assert(_pending_decrements >= 0, "can't be negative"); @@ -159,6 +180,7 @@ static void acknowledge_yield_request() { jint decrement = _pending_decrements; if (decrement > 0) { + assert(CMSIncrementalMode, "Currently only used w/iCMS"); // Order important to preserve: _pending_yields >= _pending_decrements Atomic::add(-decrement, &_pending_decrements); Atomic::add(-decrement, &_pending_yields); @@ -195,7 +217,7 @@ } } -// For scoped increment/decrement of yield requests +// For scoped increment/decrement of (synchronous) yield requests class CMSSynchronousYieldRequest: public StackObj { public: CMSSynchronousYieldRequest() { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -278,15 +278,16 @@ if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); } -void CMRegionStack::push(MemRegion mr) { +void CMRegionStack::push_lock_free(MemRegion mr) { assert(mr.word_size() > 0, "Precondition"); while (true) { - if (isFull()) { + jint index = _index; + + if (index >= _capacity) { _overflow = true; return; } // Otherwise... - jint index = _index; jint next_index = index+1; jint res = Atomic::cmpxchg(next_index, &_index, index); if (res == index) { @@ -297,19 +298,17 @@ } } -// Currently we do not call this at all. Normally we would call it -// during the concurrent marking / remark phases but we now call -// the lock-based version instead. But we might want to resurrect this -// code in the future. So, we'll leave it here commented out. -#if 0 -MemRegion CMRegionStack::pop() { +// Lock-free pop of the region stack. Called during the concurrent +// marking / remark phases. Should only be called in tandem with +// other lock-free pops. +MemRegion CMRegionStack::pop_lock_free() { while (true) { - // Otherwise... jint index = _index; if (index == 0) { return MemRegion(); } + // Otherwise... jint next_index = index-1; jint res = Atomic::cmpxchg(next_index, &_index, index); if (res == index) { @@ -326,7 +325,11 @@ // Otherwise, we need to try again. } } -#endif // 0 + +#if 0 +// The routines that manipulate the region stack with a lock are +// not currently used. They should be retained, however, as a +// diagnostic aid. void CMRegionStack::push_with_lock(MemRegion mr) { assert(mr.word_size() > 0, "Precondition"); @@ -361,6 +364,7 @@ } } } +#endif bool CMRegionStack::invalidate_entries_into_cset() { bool result = false; @@ -583,10 +587,13 @@ #endif guarantee(parallel_marking_threads() > 0, "peace of mind"); - _parallel_workers = new WorkGang("G1 Parallel Marking Threads", - (int) parallel_marking_threads(), false, true); - if (_parallel_workers == NULL) + _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", + (int) _parallel_marking_threads, false, true); + if (_parallel_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); + } else { + _parallel_workers->initialize_workers(); + } } // so that the call below can read a sensible value @@ -645,8 +652,9 @@ // We do reset all of them, since different phases will use // different number of active threads. So, it's easiest to have all // of them ready. - for (int i = 0; i < (int) _max_task_num; ++i) + for (int i = 0; i < (int) _max_task_num; ++i) { _tasks[i]->reset(_nextMarkBitMap); + } // we need this to make sure that the flag is on during the evac // pause with initial mark piggy-backed @@ -985,7 +993,7 @@ "below the finger, pushing it", mr.start(), mr.end()); - if (!region_stack_push(mr)) { + if (!region_stack_push_lock_free(mr)) { if (verbose_low()) gclog_or_tty->print_cr("[global] region stack has overflown."); } @@ -1451,7 +1459,7 @@ _bm, _g1h->concurrent_mark(), _region_bm, _card_bm); calccl.no_yield(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&calccl, i, HeapRegion::FinalCountClaimValue); } else { @@ -1531,7 +1539,7 @@ G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &_par_cleanup_thread_state[i]->list, i); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, HeapRegion::NoteEndClaimValue); } else { @@ -1575,7 +1583,7 @@ {} void work(int i) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1rs->scrub_par(_region_bm, _card_bm, i, HeapRegion::ScrubRemSetClaimValue); } else { @@ -1647,7 +1655,7 @@ // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), &_region_bm, &_card_bm); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { assert(g1h->check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); @@ -1695,7 +1703,7 @@ // Note end of marking in all heap regions. double note_end_start = os::elapsedTime(); G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { int n_workers = g1h->workers()->total_workers(); g1h->set_par_threads(n_workers); g1h->workers()->run_task(&g1_par_note_end_task); @@ -1720,7 +1728,7 @@ if (G1ScrubRemSets) { double rs_scrub_start = os::elapsedTime(); G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { int n_workers = g1h->workers()->total_workers(); g1h->set_par_threads(n_workers); g1h->workers()->run_task(&g1_par_scrub_rs_task); @@ -1934,7 +1942,7 @@ g1h->ensure_parsability(false); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { G1CollectedHeap::StrongRootsScope srs(g1h); // this is remark, so we'll use up all available threads int active_workers = ParallelGCThreads; @@ -2330,6 +2338,39 @@ return NULL; } +bool ConcurrentMark::invalidate_aborted_regions_in_cset() { + bool result = false; + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* the_task = _tasks[i]; + MemRegion mr = the_task->aborted_region(); + if (mr.start() != NULL) { + assert(mr.end() != NULL, "invariant"); + assert(mr.word_size() > 0, "invariant"); + HeapRegion* hr = _g1h->heap_region_containing(mr.start()); + assert(hr != NULL, "invariant"); + if (hr->in_collection_set()) { + // The region points into the collection set + the_task->set_aborted_region(MemRegion()); + result = true; + } + } + } + return result; +} + +bool ConcurrentMark::has_aborted_regions() { + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* the_task = _tasks[i]; + MemRegion mr = the_task->aborted_region(); + if (mr.start() != NULL) { + assert(mr.end() != NULL, "invariant"); + assert(mr.word_size() > 0, "invariant"); + return true; + } + } + return false; +} + void ConcurrentMark::oops_do(OopClosure* cl) { if (_markStack.size() > 0 && verbose_low()) gclog_or_tty->print_cr("[global] scanning the global marking stack, " @@ -2348,13 +2389,22 @@ queue->oops_do(cl); } - // finally, invalidate any entries that in the region stack that + // Invalidate any entries, that are in the region stack, that // point into the collection set if (_regionStack.invalidate_entries_into_cset()) { // otherwise, any gray objects copied during the evacuation pause // might not be visited. assert(_should_gray_objects, "invariant"); } + + // Invalidate any aborted regions, recorded in the individual CM + // tasks, that point into the collection set. + if (invalidate_aborted_regions_in_cset()) { + // otherwise, any gray objects copied during the evacuation pause + // might not be visited. + assert(_should_gray_objects, "invariant"); + } + } void ConcurrentMark::clear_marking_state() { @@ -2635,7 +2685,7 @@ // irrespective whether all collection set regions are below the // finger, if the region stack is not empty. This is expected to be // a rare case, so I don't think it's necessary to be smarted about it. - if (!region_stack_empty()) + if (!region_stack_empty() || has_aborted_regions()) _should_gray_objects = true; } @@ -2654,8 +2704,10 @@ _nextMarkBitMap->clearAll(); // Empty mark stack clear_marking_state(); - for (int i = 0; i < (int)_max_task_num; ++i) + for (int i = 0; i < (int)_max_task_num; ++i) { _tasks[i]->clear_region_fields(); + _tasks[i]->clear_aborted_region(); + } _has_aborted = true; SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); @@ -2933,6 +2985,7 @@ _nextMarkBitMap = nextMarkBitMap; clear_region_fields(); + clear_aborted_region(); _calls = 0; _elapsed_time_ms = 0.0; @@ -3369,14 +3422,14 @@ CMObjectClosure oc(this); SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.set_par_closure(_task_id, &oc); else satb_mq_set.set_closure(&oc); // This keeps claiming and applying the closure to completed buffers // until we run out of buffers or we need to abort. - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { while (!has_aborted() && satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { if (_cm->verbose_medium()) @@ -3396,7 +3449,7 @@ if (!concurrent() && !has_aborted()) { // We should only do this during remark. - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.par_iterate_closure_all_threads(_task_id); else satb_mq_set.iterate_closure_all_threads(); @@ -3408,7 +3461,7 @@ concurrent() || satb_mq_set.completed_buffers_num() == 0, "invariant"); - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.set_par_closure(_task_id, NULL); else satb_mq_set.set_closure(NULL); @@ -3425,20 +3478,32 @@ assert(_region_finger == NULL, "it should be NULL when we're not scanning a region"); - if (!_cm->region_stack_empty()) { + if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { if (_cm->verbose_low()) gclog_or_tty->print_cr("[%d] draining region stack, size = %d", _task_id, _cm->region_stack_size()); - MemRegion mr = _cm->region_stack_pop_with_lock(); - // it returns MemRegion() if the pop fails - statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + MemRegion mr; + + if (!_aborted_region.is_empty()) { + mr = _aborted_region; + _aborted_region = MemRegion(); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", + _task_id, mr.start(), mr.end()); + } else { + mr = _cm->region_stack_pop_lock_free(); + // it returns MemRegion() if the pop fails + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + } while (mr.start() != NULL) { if (_cm->verbose_medium()) gclog_or_tty->print_cr("[%d] we are scanning region " "["PTR_FORMAT", "PTR_FORMAT")", _task_id, mr.start(), mr.end()); + assert(mr.end() <= _cm->finger(), "otherwise the region shouldn't be on the stack"); assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); @@ -3451,7 +3516,7 @@ if (has_aborted()) mr = MemRegion(); else { - mr = _cm->region_stack_pop_with_lock(); + mr = _cm->region_stack_pop_lock_free(); // it returns MemRegion() if the pop fails statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); } @@ -3465,6 +3530,10 @@ // have definitely set _region_finger to something non-null. assert(_region_finger != NULL, "invariant"); + // Make sure that any previously aborted region has been + // cleared. + assert(_aborted_region.is_empty(), "aborted region not cleared"); + // The iteration was actually aborted. So now _region_finger // points to the address of the object we last scanned. If we // leave it there, when we restart this task, we will rescan @@ -3477,14 +3546,14 @@ if (!newRegion.is_empty()) { if (_cm->verbose_low()) { - gclog_or_tty->print_cr("[%d] pushing unscanned region" - "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", + gclog_or_tty->print_cr("[%d] recording unscanned region" + "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", _task_id, newRegion.start(), newRegion.end()); } - // Now push the part of the region we didn't scan on the - // region stack to make sure a task scans it later. - _cm->region_stack_push_with_lock(newRegion); + // Now record the part of the region we didn't scan to + // make sure this task scans it later. + _aborted_region = newRegion; } // break from while mr = MemRegion(); @@ -3654,6 +3723,8 @@ assert(concurrent() || _cm->region_stack_empty(), "the region stack should have been cleared before remark"); + assert(concurrent() || !_cm->has_aborted_regions(), + "aborted regions should have been cleared before remark"); assert(_region_finger == NULL, "this should be non-null only when a region is being scanned"); @@ -3943,6 +4014,7 @@ // that, if a condition is false, we can immediately find out // which one. guarantee(_cm->out_of_regions(), "only way to reach here"); + guarantee(_aborted_region.is_empty(), "only way to reach here"); guarantee(_cm->region_stack_empty(), "only way to reach here"); guarantee(_cm->mark_stack_empty(), "only way to reach here"); guarantee(_task_queue->size() == 0, "only way to reach here"); @@ -4042,7 +4114,8 @@ _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), _task_queues(task_queues), - _oop_closure(NULL) { + _oop_closure(NULL), + _aborted_region(MemRegion()) { guarantee(task_queue != NULL, "invariant"); guarantee(task_queues != NULL, "invariant"); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -250,21 +250,23 @@ // This is lock-free; assumes that it will only be called in parallel // with other "push" operations (no pops). - void push(MemRegion mr); - -#if 0 - // This is currently not used. See the comment in the .cpp file. + void push_lock_free(MemRegion mr); // Lock-free; assumes that it will only be called in parallel // with other "pop" operations (no pushes). - MemRegion pop(); -#endif // 0 + MemRegion pop_lock_free(); + +#if 0 + // The routines that manipulate the region stack with a lock are + // not currently used. They should be retained, however, as a + // diagnostic aid. // These two are the implementations that use a lock. They can be // called concurrently with each other but they should not be called // concurrently with the lock-free versions (push() / pop()). void push_with_lock(MemRegion mr); MemRegion pop_with_lock(); +#endif bool isEmpty() { return _index == 0; } bool isFull() { return _index == _capacity; } @@ -398,6 +400,7 @@ volatile bool _concurrent; // set at the end of a Full GC so that marking aborts volatile bool _has_aborted; + // used when remark aborts due to an overflow to indicate that // another concurrent marking phase should start volatile bool _restart_for_overflow; @@ -548,23 +551,30 @@ bool mark_stack_overflow() { return _markStack.overflow(); } bool mark_stack_empty() { return _markStack.isEmpty(); } - // Manipulation of the region stack - bool region_stack_push(MemRegion mr) { + // (Lock-free) Manipulation of the region stack + bool region_stack_push_lock_free(MemRegion mr) { // Currently we only call the lock-free version during evacuation // pauses. assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); - _regionStack.push(mr); + _regionStack.push_lock_free(mr); if (_regionStack.overflow()) { set_has_overflown(); return false; } return true; } + + // Lock-free version of region-stack pop. Should only be + // called in tandem with other lock-free pops. + MemRegion region_stack_pop_lock_free() { + return _regionStack.pop_lock_free(); + } + #if 0 - // Currently this is not used. See the comment in the .cpp file. - MemRegion region_stack_pop() { return _regionStack.pop(); } -#endif // 0 + // The routines that manipulate the region stack with a lock are + // not currently used. They should be retained, however, as a + // diagnostic aid. bool region_stack_push_with_lock(MemRegion mr) { // Currently we only call the lock-based version during either @@ -579,6 +589,7 @@ } return true; } + MemRegion region_stack_pop_with_lock() { // Currently we only call the lock-based version during either // concurrent marking or remark. @@ -587,11 +598,21 @@ return _regionStack.pop_with_lock(); } +#endif int region_stack_size() { return _regionStack.size(); } bool region_stack_overflow() { return _regionStack.overflow(); } bool region_stack_empty() { return _regionStack.isEmpty(); } + // Iterate over any regions that were aborted while draining the + // region stack (any such regions are saved in the corresponding + // CMTask) and invalidate (i.e. assign to the empty MemRegion()) + // any regions that point into the collection set. + bool invalidate_aborted_regions_in_cset(); + + // Returns true if there are any aborted memory regions. + bool has_aborted_regions(); + bool concurrent_marking_in_progress() { return _concurrent_marking_in_progress; } @@ -856,6 +877,15 @@ // stack. HeapWord* _region_finger; + // If we abort while scanning a region we record the remaining + // unscanned portion and check this field when marking restarts. + // This avoids having to push on the region stack while other + // marking threads may still be popping regions. + // If we were to push the unscanned portion directly to the + // region stack then we would need to using locking versions + // of the push and pop operations. + MemRegion _aborted_region; + // the number of words this task has scanned size_t _words_scanned; // When _words_scanned reaches this limit, the regular clock is @@ -1012,6 +1042,15 @@ void clear_has_aborted() { _has_aborted = false; } bool claimed() { return _claimed; } + // Support routines for the partially scanned region that may be + // recorded as a result of aborting while draining the CMRegionStack + MemRegion aborted_region() { return _aborted_region; } + void set_aborted_region(MemRegion mr) + { _aborted_region = mr; } + + // Clears any recorded partially scanned region + void clear_aborted_region() { set_aborted_region(MemRegion()); } + void set_oop_closure(OopClosure* oop_closure) { _oop_closure = oop_closure; } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -303,9 +303,10 @@ } void ConcurrentMarkThread::sleepBeforeNextCycle() { - clear_in_progress(); // We join here because we don't want to do the "shouldConcurrentMark()" // below while the world is otherwise stopped. + assert(!in_progress(), "should have been cleared"); + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); while (!started()) { CGC_lock->wait(Mutex::_no_safepoint_check_flag); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -69,12 +69,12 @@ ConcurrentMark* cm() { return _cm; } - void set_started() { _started = true; } - void clear_started() { _started = false; } + void set_started() { assert(!_in_progress, "cycle in progress"); _started = true; } + void clear_started() { assert(_in_progress, "must be starting a cycle"); _started = false; } bool started() { return _started; } - void set_in_progress() { _in_progress = true; } - void clear_in_progress() { _in_progress = false; } + void set_in_progress() { assert(_started, "must be starting a cycle"); _in_progress = true; } + void clear_in_progress() { assert(!_started, "must not be starting a new cycle"); _in_progress = false; } bool in_progress() { return _in_progress; } // This flag returns true from the moment a marking cycle is diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -37,11 +37,10 @@ class DirtyCardQueue: public PtrQueue { public: DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) : - PtrQueue(qset_, perm) - { - // Dirty card queues are always active. - _active = true; - } + // Dirty card queues are always active, so we create them with their + // active field set to true. + PtrQueue(qset_, perm, true /* active */) { } + // Apply the closure to all elements, and reset the index to make the // buffer empty. If a closure application returns "false", return // "false" immediately, halting the iteration. If "consume" is true, diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -961,7 +961,8 @@ } // Rebuild remembered sets of all regions. - if (ParallelGCThreads > 0) { + + if (G1CollectedHeap::use_parallel_gc_threads()) { ParRebuildRSTask rebuild_rs_task(this); assert(check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); @@ -1784,6 +1785,14 @@ _full_collections_completed += 1; + // We need to clear the "in_progress" flag in the CM thread before + // we wake up any waiters (especially when ExplicitInvokesConcurrent + // is set) so that if a waiter requests another System.gc() it doesn't + // incorrectly see that a marking cyle is still in progress. + if (outer) { + _cmThread->clear_in_progress(); + } + // This notify_all() will ensure that a thread that called // System.gc() with (with ExplicitGCInvokesConcurrent set or not) // and it's waiting for a full GC to finish will be woken up. It is @@ -1960,7 +1969,7 @@ int worker, jint claim_value) { const size_t regions = n_regions(); - const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1); + const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); // try to spread out the starting points of the workers const size_t start_index = regions / worker_num * (size_t) worker; @@ -2527,7 +2536,7 @@ } void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->print_worker_threads_on(st); } @@ -2543,7 +2552,7 @@ } void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->threads_do(tc); } tc->do_thread(_cmThread); @@ -3083,7 +3092,7 @@ if (r != NULL) { r_used = r->used(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { // need to take the lock to guard against two threads calling // get_gc_alloc_region concurrently (very unlikely but...) MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); @@ -4182,6 +4191,8 @@ // *** Common G1 Evacuation Stuff +// This method is run in a GC worker. + void G1CollectedHeap:: g1_process_strong_roots(bool collecting_perm_gen, @@ -4259,7 +4270,7 @@ }; void G1CollectedHeap::save_marks() { - if (ParallelGCThreads == 0) { + if (!CollectedHeap::use_parallel_gc_threads()) { SaveMarksClosure sm; heap_region_iterate(&sm); } @@ -4284,7 +4295,7 @@ assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -656,6 +656,9 @@ bool _unclean_regions_coming; public: + + SubTasksDone* process_strong_tasks() { return _process_strong_tasks; } + void set_refine_cte_cl_concurrency(bool concurrent); RefToScanQueue *task_queue(int i) const; @@ -684,7 +687,7 @@ void set_par_threads(int t) { SharedHeap::set_par_threads(t); - _process_strong_tasks->set_par_threads(t); + _process_strong_tasks->set_n_threads(t); } virtual CollectedHeap::Name kind() const { @@ -1688,8 +1691,8 @@ ref = new_ref; } - int refs_to_scan() { return refs()->size(); } - int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); } + int refs_to_scan() { return (int)refs()->size(); } + int overflowed_refs_to_scan() { return (int)refs()->overflow_stack()->size(); } template void update_rs(HeapRegion* from, T* p, int tid) { if (G1DeferredRSUpdate) { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -72,7 +72,10 @@ // G1CollectorPolicy::G1CollectorPolicy() : - _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1), + _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads() + ? ParallelGCThreads : 1), + + _n_pauses(0), _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -1073,7 +1076,7 @@ } double G1CollectorPolicy::avg_value (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = 0.0; for (uint i = 0; i < ParallelGCThreads; ++i) ret += data[i]; @@ -1084,7 +1087,7 @@ } double G1CollectorPolicy::max_value (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = data[0]; for (uint i = 1; i < ParallelGCThreads; ++i) if (data[i] > ret) @@ -1096,7 +1099,7 @@ } double G1CollectorPolicy::sum_of_values (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double sum = 0.0; for (uint i = 0; i < ParallelGCThreads; i++) sum += data[i]; @@ -1110,7 +1113,7 @@ double* data2) { double ret = data1[0] + data2[0]; - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { for (uint i = 1; i < ParallelGCThreads; ++i) { double data = data1[i] + data2[i]; if (data > ret) @@ -1126,7 +1129,7 @@ void G1CollectorPolicy::record_collection_pause_end() { double end_time_sec = os::elapsedTime(); double elapsed_ms = _last_pause_time_ms; - bool parallel = ParallelGCThreads > 0; + bool parallel = G1CollectedHeap::use_parallel_gc_threads(); double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0; size_t rs_size = _cur_collection_pause_used_regions_at_start - collection_set_size(); @@ -1941,7 +1944,7 @@ // Further, we're now always doing parallel collection. But I'm still // leaving this here as a placeholder for a more precise assertion later. // (DLD, 10/05.) - assert((true || ParallelGCThreads > 0) || + assert((true || G1CollectedHeap::use_parallel_gc_threads()) || _g1->evacuation_failed() || recent_survival_rate <= 1.0, "Or bad frac"); return recent_survival_rate; @@ -1961,7 +1964,7 @@ // Further, we're now always doing parallel collection. But I'm still // leaving this here as a placeholder for a more precise assertion later. // (DLD, 10/05.) - assert((true || ParallelGCThreads > 0) || + assert((true || G1CollectedHeap::use_parallel_gc_threads()) || last_survival_rate <= 1.0, "Or bad frac"); return last_survival_rate; } else { @@ -2121,7 +2124,7 @@ } void G1CollectorPolicy::print_summary(PauseSummary* summary) const { - bool parallel = ParallelGCThreads > 0; + bool parallel = G1CollectedHeap::use_parallel_gc_threads(); MainBodySummary* body_summary = summary->main_body_summary(); if (summary->get_total_seq()->num() > 0) { print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq()); @@ -2559,7 +2562,7 @@ gclog_or_tty->print_cr(" clear marked regions + work1: %8.3f ms.", (clear_marked_end - start)*1000.0); } - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { const size_t OverpartitionFactor = 4; const size_t MinWorkUnit = 8; const size_t WorkUnit = diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/g1MarkSweep.cpp --- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -101,22 +101,6 @@ GenMarkSweep::_preserved_count_max = 0; GenMarkSweep::_preserved_marks = NULL; GenMarkSweep::_preserved_count = 0; - GenMarkSweep::_preserved_mark_stack = NULL; - GenMarkSweep::_preserved_oop_stack = NULL; - - GenMarkSweep::_marking_stack = - new (ResourceObj::C_HEAP) GrowableArray(4000, true); - GenMarkSweep::_objarray_stack = - new (ResourceObj::C_HEAP) GrowableArray(50, true); - - int size = SystemDictionary::number_of_classes() * 2; - GenMarkSweep::_revisit_klass_stack = - new (ResourceObj::C_HEAP) GrowableArray(size, true); - // (#klass/k)^2 for k ~ 10 appears a better fit, but this will have to do - // for now until we have a chance to work out a more optimal setting. - GenMarkSweep::_revisit_mdo_stack = - new (ResourceObj::C_HEAP) GrowableArray(size*2, true); - } void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, @@ -145,7 +129,7 @@ // Follow system dictionary roots and unload classes bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive); - assert(GenMarkSweep::_marking_stack->is_empty(), + assert(GenMarkSweep::_marking_stack.is_empty(), "stack should be empty by now"); // Follow code cache roots (has to be done after system dictionary, @@ -157,19 +141,19 @@ // Update subklass/sibling/implementor links of live klasses GenMarkSweep::follow_weak_klass_links(); - assert(GenMarkSweep::_marking_stack->is_empty(), + assert(GenMarkSweep::_marking_stack.is_empty(), "stack should be empty by now"); // Visit memoized MDO's and clear any unmarked weak refs GenMarkSweep::follow_mdo_weak_refs(); - assert(GenMarkSweep::_marking_stack->is_empty(), "just drained"); + assert(GenMarkSweep::_marking_stack.is_empty(), "just drained"); // Visit symbol and interned string tables and delete unmarked oops SymbolTable::unlink(&GenMarkSweep::is_alive); StringTable::unlink(&GenMarkSweep::is_alive); - assert(GenMarkSweep::_marking_stack->is_empty(), + assert(GenMarkSweep::_marking_stack.is_empty(), "stack should be empty by now"); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -523,7 +523,7 @@ assert(!_traversal_in_progress, "Invariant between iterations."); set_traversal(true); if (ParallelGCThreads > 0) { - _seq_task->set_par_threads((int)n_workers()); + _seq_task->set_n_threads((int)n_workers()); } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/ptrQueue.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -89,6 +89,10 @@ return _buf == NULL ? 0 : _sz - _index; } + bool is_empty() { + return _buf == NULL || _sz == _index; + } + // Set the "active" property of the queue to "b". An enqueue to an // inactive thread is a no-op. Setting a queue to inactive resets its // log to the empty state. diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/g1/satbQueue.hpp --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -29,7 +29,12 @@ class ObjPtrQueue: public PtrQueue { public: ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) : - PtrQueue(qset_, perm, qset_->is_active()) { } + // SATB queues are only active during marking cycles. We create + // them with their active field set to false. If a thread is + // created during a cycle and its SATB queue needs to be activated + // before the thread starts running, we'll need to set its active + // field to true. This is done in JavaThread::initialize_queues(). + PtrQueue(qset_, perm, false /* active */) { } // Apply the closure to all elements, and reset the index to make the // buffer empty. void apply_closure(ObjectClosure* cl); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep --- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep Fri Oct 08 09:29:09 2010 -0700 @@ -171,6 +171,7 @@ concurrentMarkSweepGeneration.hpp generationCounters.hpp concurrentMarkSweepGeneration.hpp memoryService.hpp concurrentMarkSweepGeneration.hpp mutexLocker.hpp +concurrentMarkSweepGeneration.hpp stack.inline.hpp concurrentMarkSweepGeneration.hpp taskqueue.hpp concurrentMarkSweepGeneration.hpp virtualspace.hpp concurrentMarkSweepGeneration.hpp yieldingWorkgroup.hpp diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/includeDB_gc_parallelScavenge --- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Fri Oct 08 09:29:09 2010 -0700 @@ -187,9 +187,11 @@ psCompactionManager.cpp psParallelCompact.hpp psCompactionManager.cpp psCompactionManager.hpp psCompactionManager.cpp psOldGen.hpp +psCompactionManager.cpp stack.inline.hpp psCompactionManager.cpp systemDictionary.hpp psCompactionManager.hpp allocation.hpp +psCompactionManager.hpp stack.hpp psCompactionManager.hpp taskqueue.hpp psCompactionManager.inline.hpp psCompactionManager.hpp @@ -233,12 +235,14 @@ psMarkSweep.cpp referenceProcessor.hpp psMarkSweep.cpp safepoint.hpp psMarkSweep.cpp spaceDecorator.hpp +psMarkSweep.cpp stack.inline.hpp psMarkSweep.cpp symbolTable.hpp psMarkSweep.cpp systemDictionary.hpp psMarkSweep.cpp vmThread.hpp psMarkSweep.hpp markSweep.inline.hpp psMarkSweep.hpp collectorCounters.hpp +psMarkSweep.hpp stack.hpp psMarkSweepDecorator.cpp liveRange.hpp psMarkSweepDecorator.cpp markSweep.inline.hpp @@ -280,6 +284,7 @@ psParallelCompact.cpp referencePolicy.hpp psParallelCompact.cpp referenceProcessor.hpp psParallelCompact.cpp safepoint.hpp +psParallelCompact.cpp stack.inline.hpp psParallelCompact.cpp symbolTable.hpp psParallelCompact.cpp systemDictionary.hpp psParallelCompact.cpp vmThread.hpp @@ -367,6 +372,7 @@ psScavenge.cpp referenceProcessor.hpp psScavenge.cpp resourceArea.hpp psScavenge.cpp spaceDecorator.hpp +psScavenge.cpp stack.inline.hpp psScavenge.cpp threadCritical.hpp psScavenge.cpp vmThread.hpp psScavenge.cpp vm_operations.hpp @@ -376,6 +382,7 @@ psScavenge.hpp collectorCounters.hpp psScavenge.hpp oop.hpp psScavenge.hpp psVirtualspace.hpp +psScavenge.hpp stack.hpp psScavenge.inline.hpp cardTableExtension.hpp psScavenge.inline.hpp parallelScavengeHeap.hpp diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/includeDB_gc_serial --- a/src/share/vm/gc_implementation/includeDB_gc_serial Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_serial Fri Oct 08 09:29:09 2010 -0700 @@ -93,11 +93,13 @@ markSweep.hpp growableArray.hpp markSweep.hpp markOop.hpp markSweep.hpp oop.hpp +markSweep.hpp stack.hpp markSweep.hpp timer.hpp markSweep.hpp universe.hpp markSweep.inline.hpp collectedHeap.hpp markSweep.inline.hpp markSweep.hpp +markSweep.inline.hpp stack.inline.hpp mutableSpace.hpp immutableSpace.hpp mutableSpace.hpp memRegion.hpp diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp --- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -44,7 +44,7 @@ int n_strides = n_threads * StridesPerThread; SequentialSubTasksDone* pst = sp->par_seq_tasks(); - pst->set_par_threads(n_threads); + pst->set_n_threads(n_threads); pst->set_n_tasks(n_strides); int stride = 0; diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -34,12 +34,12 @@ Generation* old_gen_, int thread_num_, ObjToScanQueueSet* work_queue_set_, - GrowableArray** overflow_stack_set_, + Stack* overflow_stacks_, size_t desired_plab_sz_, ParallelTaskTerminator& term_) : _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), - _overflow_stack(overflow_stack_set_[thread_num_]), + _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL), _ageTable(false), // false ==> not the global age table, no perf data. _to_space_alloc_buffer(desired_plab_sz_), _to_space_closure(gen_, this), _old_gen_closure(gen_, this), @@ -159,11 +159,12 @@ assert(ParGCUseLocalOverflow, "Else should not call"); assert(young_gen()->overflow_list() == NULL, "Error"); ObjToScanQueue* queue = work_queue(); - GrowableArray* of_stack = overflow_stack(); - uint num_overflow_elems = of_stack->length(); - uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, - (juint)ParGCDesiredObjsFromOverflowList), - num_overflow_elems); + Stack* const of_stack = overflow_stack(); + const size_t num_overflow_elems = of_stack->size(); + const size_t space_available = queue->max_elems() - queue->size(); + const size_t num_take_elems = MIN3(space_available / 4, + ParGCDesiredObjsFromOverflowList, + num_overflow_elems); // Transfer the most recent num_take_elems from the overflow // stack to our work queue. for (size_t i = 0; i != num_take_elems; i++) { @@ -271,7 +272,7 @@ ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, - GrowableArray** overflow_stacks_, + Stack* overflow_stacks_, size_t desired_plab_sz, ParallelTaskTerminator& term); @@ -302,17 +303,19 @@ ParScanThreadStateSet::ParScanThreadStateSet( int num_threads, Space& to_space, ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, - GrowableArray** overflow_stack_set_, + Stack* overflow_stacks, size_t desired_plab_sz, ParallelTaskTerminator& term) : ResourceArray(sizeof(ParScanThreadState), num_threads), _gen(gen), _next_gen(old_gen), _term(term) { assert(num_threads > 0, "sanity check!"); + assert(ParGCUseLocalOverflow == (overflow_stacks != NULL), + "overflow_stack allocation mismatch"); // Initialize states. for (int i = 0; i < num_threads; ++i) { new ((ParScanThreadState*)_data + i) ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, - overflow_stack_set_, desired_plab_sz, term); + overflow_stacks, desired_plab_sz, term); } } @@ -596,14 +599,11 @@ for (uint i2 = 0; i2 < ParallelGCThreads; i2++) _task_queues->queue(i2)->initialize(); - _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); - guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure"); - for (uint i = 0; i < ParallelGCThreads; i++) { - if (ParGCUseLocalOverflow) { - _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray(512, true); - guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure."); - } else { - _overflow_stacks[i] = NULL; + _overflow_stacks = NULL; + if (ParGCUseLocalOverflow) { + _overflow_stacks = NEW_C_HEAP_ARRAY(Stack, ParallelGCThreads); + for (size_t i = 0; i < ParallelGCThreads; ++i) { + new (_overflow_stacks + i) Stack(); } } @@ -937,12 +937,9 @@ } else { assert(HandlePromotionFailure, "Should only be here if promotion failure handling is on"); - if (_promo_failure_scan_stack != NULL) { - // Can be non-null because of reference processing. - // Free stack with its elements. - delete _promo_failure_scan_stack; - _promo_failure_scan_stack = NULL; - } + assert(_promo_failure_scan_stack.is_empty(), "post condition"); + _promo_failure_scan_stack.clear(true); // Clear cached segments. + remove_forwarding_pointers(); if (PrintGCDetails) { gclog_or_tty->print(" (promotion failed)"); @@ -1397,8 +1394,8 @@ size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); + assert(!UseCompressedOops, "Error"); assert(par_scan_state->overflow_stack() == NULL, "Error"); - assert(!UseCompressedOops, "Error"); if (_overflow_list == NULL) return false; // Otherwise, there was something there; try claiming the list. @@ -1533,3 +1530,7 @@ const char* ParNewGeneration::name() const { return "par new generation"; } + +bool ParNewGeneration::in_use() { + return UseParNewGC && ParallelGCThreads > 0; +} diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -52,7 +52,7 @@ friend class ParScanThreadStateSet; private: ObjToScanQueue *_work_queue; - GrowableArray* _overflow_stack; + Stack* const _overflow_stack; ParGCAllocBuffer _to_space_alloc_buffer; @@ -120,7 +120,7 @@ ParScanThreadState(Space* to_space_, ParNewGeneration* gen_, Generation* old_gen_, int thread_num_, ObjToScanQueueSet* work_queue_set_, - GrowableArray** overflow_stack_set_, + Stack* overflow_stacks_, size_t desired_plab_sz_, ParallelTaskTerminator& term_); @@ -144,7 +144,7 @@ void trim_queues(int max_size); // Private overflow stack usage - GrowableArray* overflow_stack() { return _overflow_stack; } + Stack* overflow_stack() { return _overflow_stack; } bool take_from_overflow_stack(); void push_on_overflow_stack(oop p); @@ -301,7 +301,7 @@ ObjToScanQueueSet* _task_queues; // Per-worker-thread local overflow stacks - GrowableArray** _overflow_stacks; + Stack* _overflow_stacks; // Desired size of survivor space plab's PLABStats _plab_stats; @@ -350,6 +350,8 @@ delete _task_queues; } + static bool in_use(); + virtual void ref_processor_init(); virtual Generation::Name kind() { return Generation::ParNew; } virtual const char* name() const; diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -59,8 +59,6 @@ PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty)); ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - assert(cm->stacks_have_been_allocated(), - "Stack space has not been allocated"); PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); switch (_root_type) { @@ -119,7 +117,6 @@ // Do the real work cm->follow_marking_stacks(); - // cm->deallocate_stacks(); } @@ -135,8 +132,6 @@ PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty)); ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - assert(cm->stacks_have_been_allocated(), - "Stack space has not been allocated"); PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); PSParallelCompact::FollowStackClosure follow_stack_closure(cm); _rp_task.work(_work_id, *PSParallelCompact::is_alive_closure(), diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -242,7 +242,11 @@ // class DrainStacksCompactionTask : public GCTask { + uint _stack_index; + uint stack_index() { return _stack_index; } public: + DrainStacksCompactionTask(uint stack_index) : GCTask(), + _stack_index(stack_index) {}; char* name() { return (char *)"drain-region-task"; } virtual void do_it(GCTaskManager* manager, uint which); }; diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -46,23 +46,6 @@ marking_stack()->initialize(); _objarray_stack.initialize(); region_stack()->initialize(); - - // Note that _revisit_klass_stack is allocated out of the - // C heap (as opposed to out of ResourceArena). - int size = - (SystemDictionary::number_of_classes() * 2) * 2 / ParallelGCThreads; - _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray(size, true); - // From some experiments (#klass/k)^2 for k = 10 seems a better fit, but this will - // have to do for now until we are able to investigate a more optimal setting. - _revisit_mdo_stack = new (ResourceObj::C_HEAP) GrowableArray(size*2, true); -} - -ParCompactionManager::~ParCompactionManager() { - delete _revisit_klass_stack; - delete _revisit_mdo_stack; - // _manager_array and _stack_array are statics - // shared with all instances of ParCompactionManager - // should not be deallocated. } void ParCompactionManager::initialize(ParMarkBitMap* mbm) { @@ -134,9 +117,9 @@ } void ParCompactionManager::reset() { - for(uint i=0; irevisit_klass_stack()->clear(); - manager_array(i)->revisit_mdo_stack()->clear(); + for(uint i = 0; i < ParallelGCThreads + 1; i++) { + assert(manager_array(i)->revisit_klass_stack()->is_empty(), "sanity"); + assert(manager_array(i)->revisit_mdo_stack()->is_empty(), "sanity"); } } @@ -178,10 +161,3 @@ } } while (!region_stack()->is_empty()); } - -#ifdef ASSERT -bool ParCompactionManager::stacks_have_been_allocated() { - return (revisit_klass_stack()->data_addr() != NULL && - revisit_mdo_stack()->data_addr() != NULL); -} -#endif diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -80,10 +80,9 @@ // type of TaskQueue. RegionTaskQueue _region_stack; -#if 1 // does this happen enough to need a per thread stack? - GrowableArray* _revisit_klass_stack; - GrowableArray* _revisit_mdo_stack; -#endif + Stack _revisit_klass_stack; + Stack _revisit_mdo_stack; + static ParMarkBitMap* _mark_bitmap; Action _action; @@ -113,10 +112,7 @@ inline static ParCompactionManager* manager_array(int index); ParCompactionManager(); - ~ParCompactionManager(); - void allocate_stacks(); - void deallocate_stacks(); ParMarkBitMap* mark_bitmap() { return _mark_bitmap; } // Take actions in preparation for a compaction. @@ -129,11 +125,8 @@ bool should_verify_only(); bool should_reset_only(); -#if 1 - // Probably stays as a growable array - GrowableArray* revisit_klass_stack() { return _revisit_klass_stack; } - GrowableArray* revisit_mdo_stack() { return _revisit_mdo_stack; } -#endif + Stack* revisit_klass_stack() { return &_revisit_klass_stack; } + Stack* revisit_mdo_stack() { return &_revisit_mdo_stack; } // Save for later processing. Must not fail. inline void push(oop obj) { _marking_stack.push(obj); } @@ -162,10 +155,6 @@ // Process tasks remaining on any stack void drain_region_stacks(); - // Debugging support -#ifdef ASSERT - bool stacks_have_been_allocated(); -#endif }; inline ParCompactionManager* ParCompactionManager::manager_array(int index) { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -466,33 +466,16 @@ _preserved_count_max = pointer_delta(to_space->end(), to_space->top(), sizeof(jbyte)); // Now divide by the size of a PreservedMark _preserved_count_max /= sizeof(PreservedMark); - - _preserved_mark_stack = NULL; - _preserved_oop_stack = NULL; - - _marking_stack = new (ResourceObj::C_HEAP) GrowableArray(4000, true); - _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray(50, true); - - int size = SystemDictionary::number_of_classes() * 2; - _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray(size, true); - // (#klass/k)^2, for k ~ 10 appears a better setting, but this will have to do for - // now until we investigate a more optimal setting. - _revisit_mdo_stack = new (ResourceObj::C_HEAP) GrowableArray(size*2, true); } void PSMarkSweep::deallocate_stacks() { - if (_preserved_oop_stack) { - delete _preserved_mark_stack; - _preserved_mark_stack = NULL; - delete _preserved_oop_stack; - _preserved_oop_stack = NULL; - } - - delete _marking_stack; - delete _objarray_stack; - delete _revisit_klass_stack; - delete _revisit_mdo_stack; + _preserved_mark_stack.clear(true); + _preserved_oop_stack.clear(true); + _marking_stack.clear(); + _objarray_stack.clear(true); + _revisit_klass_stack.clear(true); + _revisit_mdo_stack.clear(true); } void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) { @@ -542,17 +525,17 @@ // Update subklass/sibling/implementor links of live klasses follow_weak_klass_links(); - assert(_marking_stack->is_empty(), "just drained"); + assert(_marking_stack.is_empty(), "just drained"); // Visit memoized mdo's and clear unmarked weak refs follow_mdo_weak_refs(); - assert(_marking_stack->is_empty(), "just drained"); + assert(_marking_stack.is_empty(), "just drained"); // Visit symbol and interned string tables and delete unmarked oops SymbolTable::unlink(is_alive_closure()); StringTable::unlink(is_alive_closure()); - assert(_marking_stack->is_empty(), "stack should be empty by now"); + assert(_marking_stack.is_empty(), "stack should be empty by now"); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -2170,6 +2170,16 @@ heap->update_counters(); } +#ifdef ASSERT + for (size_t i = 0; i < ParallelGCThreads + 1; ++i) { + ParCompactionManager* const cm = + ParCompactionManager::manager_array(int(i)); + assert(cm->marking_stack()->is_empty(), "should be empty"); + assert(cm->region_stack()->is_empty(), "should be empty"); + assert(cm->revisit_klass_stack()->is_empty(), "should be empty"); + } +#endif // ASSERT + if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) { HandleMark hm; // Discard invalid handles created during verification gclog_or_tty->print(" VerifyAfterGC:"); @@ -2449,7 +2459,7 @@ const unsigned int task_count = MAX2(parallel_gc_threads, 1U); for (unsigned int j = 0; j < task_count; j++) { - q->enqueue(new DrainStacksCompactionTask()); + q->enqueue(new DrainStacksCompactionTask(j)); } // Find all regions that are available (can be filled immediately) and @@ -2711,21 +2721,22 @@ // All klasses on the revisit stack are marked at this point. // Update and follow all subklass, sibling and implementor links. if (PrintRevisitStats) { - gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes()); + gclog_or_tty->print_cr("#classes in system dictionary = %d", + SystemDictionary::number_of_classes()); } for (uint i = 0; i < ParallelGCThreads + 1; i++) { ParCompactionManager* cm = ParCompactionManager::manager_array(i); KeepAliveClosure keep_alive_closure(cm); - int length = cm->revisit_klass_stack()->length(); + Stack* const rks = cm->revisit_klass_stack(); if (PrintRevisitStats) { - gclog_or_tty->print_cr("Revisit klass stack[%d] length = %d", i, length); + gclog_or_tty->print_cr("Revisit klass stack[%u] length = " SIZE_FORMAT, + i, rks->size()); } - for (int j = 0; j < length; j++) { - cm->revisit_klass_stack()->at(j)->follow_weak_klass_links( - is_alive_closure(), - &keep_alive_closure); + while (!rks->is_empty()) { + Klass* const k = rks->pop(); + k->follow_weak_klass_links(is_alive_closure(), &keep_alive_closure); } - // revisit_klass_stack is cleared in reset() + cm->follow_marking_stacks(); } } @@ -2744,19 +2755,20 @@ // we can visit and clear any weak references from MDO's which // we memoized during the strong marking phase. if (PrintRevisitStats) { - gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes()); + gclog_or_tty->print_cr("#classes in system dictionary = %d", + SystemDictionary::number_of_classes()); } for (uint i = 0; i < ParallelGCThreads + 1; i++) { ParCompactionManager* cm = ParCompactionManager::manager_array(i); - GrowableArray* rms = cm->revisit_mdo_stack(); - int length = rms->length(); + Stack* rms = cm->revisit_mdo_stack(); if (PrintRevisitStats) { - gclog_or_tty->print_cr("Revisit MDO stack[%d] length = %d", i, length); + gclog_or_tty->print_cr("Revisit MDO stack[%u] size = " SIZE_FORMAT, + i, rms->size()); } - for (int j = 0; j < length; j++) { - rms->at(j)->follow_weak_refs(is_alive_closure()); + while (!rms->is_empty()) { + rms->pop()->follow_weak_refs(is_alive_closure()); } - // revisit_mdo_stack is cleared in reset() + cm->follow_marking_stacks(); } } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -185,7 +185,6 @@ void PSPromotionManager::drain_stacks_depth(bool totally_drain) { - assert(claimed_stack_depth()->overflow_stack() != NULL, "invariant"); totally_drain = totally_drain || _totally_drain; #ifdef ASSERT diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -34,9 +34,10 @@ int PSScavenge::_tenuring_threshold = 0; HeapWord* PSScavenge::_young_generation_boundary = NULL; elapsedTimer PSScavenge::_accumulated_time; -GrowableArray* PSScavenge::_preserved_mark_stack = NULL; -GrowableArray* PSScavenge::_preserved_oop_stack = NULL; +Stack PSScavenge::_preserved_mark_stack; +Stack PSScavenge::_preserved_oop_stack; CollectorCounters* PSScavenge::_counters = NULL; +bool PSScavenge::_promotion_failed = false; // Define before use class PSIsAliveClosure: public BoolObjectClosure { @@ -223,6 +224,9 @@ assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); assert(Thread::current() == (Thread*)VMThread::vm_thread(), "should be in vm thread"); + assert(_preserved_mark_stack.is_empty(), "should be empty"); + assert(_preserved_oop_stack.is_empty(), "should be empty"); + TimeStamp scavenge_entry; TimeStamp scavenge_midpoint; TimeStamp scavenge_exit; @@ -636,24 +640,20 @@ young_gen->object_iterate(&unforward_closure); if (PrintGC && Verbose) { - gclog_or_tty->print_cr("Restoring %d marks", - _preserved_oop_stack->length()); + gclog_or_tty->print_cr("Restoring %d marks", _preserved_oop_stack.size()); } // Restore any saved marks. - for (int i=0; i < _preserved_oop_stack->length(); i++) { - oop obj = _preserved_oop_stack->at(i); - markOop mark = _preserved_mark_stack->at(i); + while (!_preserved_oop_stack.is_empty()) { + oop obj = _preserved_oop_stack.pop(); + markOop mark = _preserved_mark_stack.pop(); obj->set_mark(mark); } - // Deallocate the preserved mark and oop stacks. - // The stacks were allocated as CHeap objects, so - // we must call delete to prevent mem leaks. - delete _preserved_mark_stack; - _preserved_mark_stack = NULL; - delete _preserved_oop_stack; - _preserved_oop_stack = NULL; + // Clear the preserved mark and oop stack caches. + _preserved_mark_stack.clear(true); + _preserved_oop_stack.clear(true); + _promotion_failed = false; } // Reset the PromotionFailureALot counters. @@ -661,27 +661,16 @@ } // This method is called whenever an attempt to promote an object -// fails. Some markOops will need preserving, some will not. Note +// fails. Some markOops will need preservation, some will not. Note // that the entire eden is traversed after a failed promotion, with // all forwarded headers replaced by the default markOop. This means // it is not neccessary to preserve most markOops. void PSScavenge::oop_promotion_failed(oop obj, markOop obj_mark) { - if (_preserved_mark_stack == NULL) { - ThreadCritical tc; // Lock and retest - if (_preserved_mark_stack == NULL) { - assert(_preserved_oop_stack == NULL, "Sanity"); - _preserved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); - _preserved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); - } - } - - // Because we must hold the ThreadCritical lock before using - // the stacks, we should be safe from observing partial allocations, - // which are also guarded by the ThreadCritical lock. + _promotion_failed = true; if (obj_mark->must_be_preserved_for_promotion_failure(obj)) { ThreadCritical tc; - _preserved_oop_stack->push(obj); - _preserved_mark_stack->push(obj_mark); + _preserved_oop_stack.push(obj); + _preserved_mark_stack.push(obj_mark); } } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -61,9 +61,10 @@ static HeapWord* _young_generation_boundary; // The lowest address possible for the young_gen. // This is used to decide if an oop should be scavenged, // cards should be marked, etc. - static GrowableArray* _preserved_mark_stack; // List of marks to be restored after failed promotion - static GrowableArray* _preserved_oop_stack; // List of oops that need their mark restored. + static Stack _preserved_mark_stack; // List of marks to be restored after failed promotion + static Stack _preserved_oop_stack; // List of oops that need their mark restored. static CollectorCounters* _counters; // collector performance counters + static bool _promotion_failed; static void clean_up_failed_promotion(); @@ -79,8 +80,7 @@ // Accessors static int tenuring_threshold() { return _tenuring_threshold; } static elapsedTimer* accumulated_time() { return &_accumulated_time; } - static bool promotion_failed() - { return _preserved_mark_stack != NULL; } + static bool promotion_failed() { return _promotion_failed; } static int consecutive_skipped_scavenges() { return _consecutive_skipped_scavenges; } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/shared/concurrentGCThread.cpp --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -185,7 +185,7 @@ instanceKlassHandle klass (THREAD, k); instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL); - const char thread_name[] = "Surrogate Locker Thread (CMS)"; + const char thread_name[] = "Surrogate Locker Thread (Concurrent GC)"; Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL); // Initialize thread_oop to put it into the system threadGroup diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/shared/markSweep.cpp --- a/src/share/vm/gc_implementation/shared/markSweep.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/shared/markSweep.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -25,13 +25,13 @@ #include "incls/_precompiled.incl" #include "incls/_markSweep.cpp.incl" -GrowableArray* MarkSweep::_marking_stack = NULL; -GrowableArray* MarkSweep::_objarray_stack = NULL; -GrowableArray* MarkSweep::_revisit_klass_stack = NULL; -GrowableArray* MarkSweep::_revisit_mdo_stack = NULL; +Stack MarkSweep::_marking_stack; +Stack MarkSweep::_revisit_mdo_stack; +Stack MarkSweep::_revisit_klass_stack; +Stack MarkSweep::_objarray_stack; -GrowableArray* MarkSweep::_preserved_oop_stack = NULL; -GrowableArray* MarkSweep::_preserved_mark_stack= NULL; +Stack MarkSweep::_preserved_oop_stack; +Stack MarkSweep::_preserved_mark_stack; size_t MarkSweep::_preserved_count = 0; size_t MarkSweep::_preserved_count_max = 0; PreservedMark* MarkSweep::_preserved_marks = NULL; @@ -58,37 +58,42 @@ #endif void MarkSweep::revisit_weak_klass_link(Klass* k) { - _revisit_klass_stack->push(k); + _revisit_klass_stack.push(k); } void MarkSweep::follow_weak_klass_links() { // All klasses on the revisit stack are marked at this point. // Update and follow all subklass, sibling and implementor links. if (PrintRevisitStats) { - gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes()); - gclog_or_tty->print_cr("Revisit klass stack length = %d", _revisit_klass_stack->length()); + gclog_or_tty->print_cr("#classes in system dictionary = %d", + SystemDictionary::number_of_classes()); + gclog_or_tty->print_cr("Revisit klass stack size = " SIZE_FORMAT, + _revisit_klass_stack.size()); } - for (int i = 0; i < _revisit_klass_stack->length(); i++) { - _revisit_klass_stack->at(i)->follow_weak_klass_links(&is_alive,&keep_alive); + while (!_revisit_klass_stack.is_empty()) { + Klass* const k = _revisit_klass_stack.pop(); + k->follow_weak_klass_links(&is_alive, &keep_alive); } follow_stack(); } void MarkSweep::revisit_mdo(DataLayout* p) { - _revisit_mdo_stack->push(p); + _revisit_mdo_stack.push(p); } void MarkSweep::follow_mdo_weak_refs() { // All strongly reachable oops have been marked at this point; // we can visit and clear any weak references from MDO's which // we memoized during the strong marking phase. - assert(_marking_stack->is_empty(), "Marking stack should be empty"); + assert(_marking_stack.is_empty(), "Marking stack should be empty"); if (PrintRevisitStats) { - gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes()); - gclog_or_tty->print_cr("Revisit MDO stack length = %d", _revisit_mdo_stack->length()); + gclog_or_tty->print_cr("#classes in system dictionary = %d", + SystemDictionary::number_of_classes()); + gclog_or_tty->print_cr("Revisit MDO stack size = " SIZE_FORMAT, + _revisit_mdo_stack.size()); } - for (int i = 0; i < _revisit_mdo_stack->length(); i++) { - _revisit_mdo_stack->at(i)->follow_weak_refs(&is_alive); + while (!_revisit_mdo_stack.is_empty()) { + _revisit_mdo_stack.pop()->follow_weak_refs(&is_alive); } follow_stack(); } @@ -106,41 +111,37 @@ void MarkSweep::follow_stack() { do { - while (!_marking_stack->is_empty()) { - oop obj = _marking_stack->pop(); + while (!_marking_stack.is_empty()) { + oop obj = _marking_stack.pop(); assert (obj->is_gc_marked(), "p must be marked"); obj->follow_contents(); } // Process ObjArrays one at a time to avoid marking stack bloat. - if (!_objarray_stack->is_empty()) { - ObjArrayTask task = _objarray_stack->pop(); + if (!_objarray_stack.is_empty()) { + ObjArrayTask task = _objarray_stack.pop(); objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint(); k->oop_follow_contents(task.obj(), task.index()); } - } while (!_marking_stack->is_empty() || !_objarray_stack->is_empty()); + } while (!_marking_stack.is_empty() || !_objarray_stack.is_empty()); } MarkSweep::FollowStackClosure MarkSweep::follow_stack_closure; void MarkSweep::FollowStackClosure::do_void() { follow_stack(); } -// We preserve the mark which should be replaced at the end and the location that it -// will go. Note that the object that this markOop belongs to isn't currently at that -// address but it will be after phase4 +// We preserve the mark which should be replaced at the end and the location +// that it will go. Note that the object that this markOop belongs to isn't +// currently at that address but it will be after phase4 void MarkSweep::preserve_mark(oop obj, markOop mark) { - // we try to store preserved marks in the to space of the new generation since this - // is storage which should be available. Most of the time this should be sufficient - // space for the marks we need to preserve but if it isn't we fall back in using - // GrowableArrays to keep track of the overflow. + // We try to store preserved marks in the to space of the new generation since + // this is storage which should be available. Most of the time this should be + // sufficient space for the marks we need to preserve but if it isn't we fall + // back to using Stacks to keep track of the overflow. if (_preserved_count < _preserved_count_max) { _preserved_marks[_preserved_count++].init(obj, mark); } else { - if (_preserved_mark_stack == NULL) { - _preserved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); - _preserved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); - } - _preserved_mark_stack->push(mark); - _preserved_oop_stack->push(obj); + _preserved_mark_stack.push(mark); + _preserved_oop_stack.push(obj); } } @@ -151,8 +152,7 @@ void MarkSweep::AdjustPointerClosure::do_oop(narrowOop* p) { adjust_pointer(p, _is_root); } void MarkSweep::adjust_marks() { - assert(_preserved_oop_stack == NULL || - _preserved_oop_stack->length() == _preserved_mark_stack->length(), + assert( _preserved_oop_stack.size() == _preserved_mark_stack.size(), "inconsistent preserved oop stacks"); // adjust the oops we saved earlier @@ -161,21 +161,19 @@ } // deal with the overflow stack - if (_preserved_oop_stack) { - for (int i = 0; i < _preserved_oop_stack->length(); i++) { - oop* p = _preserved_oop_stack->adr_at(i); - adjust_pointer(p); - } + StackIterator iter(_preserved_oop_stack); + while (!iter.is_empty()) { + oop* p = iter.next_addr(); + adjust_pointer(p); } } void MarkSweep::restore_marks() { - assert(_preserved_oop_stack == NULL || - _preserved_oop_stack->length() == _preserved_mark_stack->length(), + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), "inconsistent preserved oop stacks"); if (PrintGC && Verbose) { - gclog_or_tty->print_cr("Restoring %d marks", _preserved_count + - (_preserved_oop_stack ? _preserved_oop_stack->length() : 0)); + gclog_or_tty->print_cr("Restoring %d marks", + _preserved_count + _preserved_oop_stack.size()); } // restore the marks we saved earlier @@ -184,12 +182,10 @@ } // deal with the overflow - if (_preserved_oop_stack) { - for (int i = 0; i < _preserved_oop_stack->length(); i++) { - oop obj = _preserved_oop_stack->at(i); - markOop mark = _preserved_mark_stack->at(i); - obj->set_mark(mark); - } + while (!_preserved_oop_stack.is_empty()) { + oop obj = _preserved_oop_stack.pop(); + markOop mark = _preserved_mark_stack.pop(); + obj->set_mark(mark); } } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/shared/markSweep.hpp --- a/src/share/vm/gc_implementation/shared/markSweep.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/shared/markSweep.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -104,23 +104,22 @@ friend class KeepAliveClosure; friend class VM_MarkSweep; friend void marksweep_init(); - friend class DataLayout; // // Vars // protected: // Traversal stacks used during phase1 - static GrowableArray* _marking_stack; - static GrowableArray* _objarray_stack; + static Stack _marking_stack; + static Stack _objarray_stack; // Stack for live klasses to revisit at end of marking phase - static GrowableArray* _revisit_klass_stack; + static Stack _revisit_klass_stack; // Set (stack) of MDO's to revisit at end of marking phase - static GrowableArray* _revisit_mdo_stack; + static Stack _revisit_mdo_stack; // Space for storing/restoring mark word - static GrowableArray* _preserved_mark_stack; - static GrowableArray* _preserved_oop_stack; + static Stack _preserved_mark_stack; + static Stack _preserved_oop_stack; static size_t _preserved_count; static size_t _preserved_count_max; static PreservedMark* _preserved_marks; diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_implementation/shared/markSweep.inline.hpp --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -72,7 +72,7 @@ oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); if (!obj->mark()->is_marked()) { mark_object(obj); - _marking_stack->push(obj); + _marking_stack.push(obj); } } } @@ -80,7 +80,7 @@ void MarkSweep::push_objarray(oop obj, size_t index) { ObjArrayTask task(obj, index); assert(task.is_valid(), "bad ObjArrayTask"); - _objarray_stack->push(task); + _objarray_stack.push(task); } template inline void MarkSweep::adjust_pointer(T* p, bool isroot) { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,9 @@ // Memory state functions. -CollectedHeap::CollectedHeap() + +CollectedHeap::CollectedHeap() : _n_par_threads(0) + { const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT)); const size_t elements_per_word = HeapWordSize / sizeof(jint); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -59,6 +59,8 @@ MemRegion _reserved; BarrierSet* _barrier_set; bool _is_gc_active; + int _n_par_threads; + unsigned int _total_collections; // ... started unsigned int _total_full_collections; // ... started NOT_PRODUCT(volatile size_t _promotion_failure_alot_count;) @@ -293,6 +295,12 @@ } GCCause::Cause gc_cause() { return _gc_cause; } + // Number of threads currently working on GC tasks. + int n_par_threads() { return _n_par_threads; } + + // May be overridden to set additional parallelism. + virtual void set_par_threads(int t) { _n_par_threads = t; }; + // Preload classes into the shared portion of the heap, and then dump // that data to a file so that it can be loaded directly by another // VM (then terminate). @@ -606,6 +614,14 @@ return (CIFireOOMAt > 1 && _fire_out_of_memory_count >= CIFireOOMAt); } #endif + + public: + // This is a convenience method that is used in cases where + // the actual number of GC worker threads is not pertinent but + // only whether there more than 0. Use of this method helps + // reduce the occurrence of ParallelGCThreads to uses where the + // actual number may be germane. + static bool use_parallel_gc_threads() { return ParallelGCThreads > 0; } }; // Class to set and reset the GC cause for a CollectedHeap. diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/includeDB_core Fri Oct 08 09:29:09 2010 -0700 @@ -1437,12 +1437,14 @@ defNewGeneration.cpp referencePolicy.hpp defNewGeneration.cpp space.inline.hpp defNewGeneration.cpp spaceDecorator.hpp +defNewGeneration.cpp stack.inline.hpp defNewGeneration.cpp thread_.inline.hpp defNewGeneration.hpp ageTable.hpp defNewGeneration.hpp cSpaceCounters.hpp defNewGeneration.hpp generation.inline.hpp defNewGeneration.hpp generationCounters.hpp +defNewGeneration.hpp stack.hpp defNewGeneration.inline.hpp cardTableRS.hpp defNewGeneration.inline.hpp defNewGeneration.hpp @@ -3461,6 +3463,7 @@ permGen.hpp generation.hpp permGen.hpp handles.hpp permGen.hpp iterator.hpp +permGen.hpp mutexLocker.hpp permGen.hpp virtualspace.hpp placeholders.cpp fieldType.hpp @@ -3871,6 +3874,10 @@ specialized_oop_closures.hpp atomic.hpp +stack.hpp allocation.inline.hpp + +stack.inline.hpp stack.hpp + stackMapFrame.cpp globalDefinitions.hpp stackMapFrame.cpp handles.inline.hpp stackMapFrame.cpp oop.inline.hpp @@ -4115,6 +4122,7 @@ taskqueue.cpp debug.hpp taskqueue.cpp oop.inline.hpp taskqueue.cpp os.hpp +taskqueue.cpp stack.inline.hpp taskqueue.cpp taskqueue.hpp taskqueue.cpp thread_.inline.hpp @@ -4122,6 +4130,7 @@ taskqueue.hpp allocation.inline.hpp taskqueue.hpp mutex.hpp taskqueue.hpp orderAccess_.inline.hpp +taskqueue.hpp stack.hpp templateInterpreter.cpp interpreter.hpp templateInterpreter.cpp interpreterGenerator.hpp @@ -4741,6 +4750,7 @@ workgroup.cpp os.hpp workgroup.cpp workgroup.hpp +workgroup.hpp taskqueue.hpp workgroup.hpp thread_.inline.hpp xmlstream.cpp allocation.hpp diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/allocation.hpp --- a/src/share/vm/memory/allocation.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/allocation.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -289,16 +289,17 @@ // One of the following macros must be used when allocating // an array or object from an arena -#define NEW_ARENA_ARRAY(arena, type, size)\ - (type*) arena->Amalloc((size) * sizeof(type)) +#define NEW_ARENA_ARRAY(arena, type, size) \ + (type*) (arena)->Amalloc((size) * sizeof(type)) -#define REALLOC_ARENA_ARRAY(arena, type, old, old_size, new_size)\ - (type*) arena->Arealloc((char*)(old), (old_size) * sizeof(type), (new_size) * sizeof(type) ) +#define REALLOC_ARENA_ARRAY(arena, type, old, old_size, new_size) \ + (type*) (arena)->Arealloc((char*)(old), (old_size) * sizeof(type), \ + (new_size) * sizeof(type) ) -#define FREE_ARENA_ARRAY(arena, type, old, size)\ - arena->Afree((char*)(old), (size) * sizeof(type)) +#define FREE_ARENA_ARRAY(arena, type, old, size) \ + (arena)->Afree((char*)(old), (size) * sizeof(type)) -#define NEW_ARENA_OBJ(arena, type)\ +#define NEW_ARENA_OBJ(arena, type) \ NEW_ARENA_ARRAY(arena, type, 1) diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/defNewGeneration.cpp --- a/src/share/vm/memory/defNewGeneration.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/defNewGeneration.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -87,9 +87,7 @@ _gch->oop_since_save_marks_iterate(_level, _scan_cur_or_nonheap, _scan_older); } while (!_gch->no_allocs_since_save_marks(_level)); - guarantee(_gen->promo_failure_scan_stack() == NULL - || _gen->promo_failure_scan_stack()->length() == 0, - "Failed to finish scan"); + guarantee(_gen->promo_failure_scan_is_complete(), "Failed to finish scan"); } ScanClosure::ScanClosure(DefNewGeneration* g, bool gc_barrier) : @@ -130,9 +128,6 @@ int level, const char* policy) : Generation(rs, initial_size, level), - _objs_with_preserved_marks(NULL), - _preserved_marks_of_objs(NULL), - _promo_failure_scan_stack(NULL), _promo_failure_drain_in_progress(false), _should_allocate_from_space(false) { @@ -604,12 +599,8 @@ } else { assert(HandlePromotionFailure, "Should not be here unless promotion failure handling is on"); - assert(_promo_failure_scan_stack != NULL && - _promo_failure_scan_stack->length() == 0, "post condition"); - - // deallocate stack and it's elements - delete _promo_failure_scan_stack; - _promo_failure_scan_stack = NULL; + assert(_promo_failure_scan_stack.is_empty(), "post condition"); + _promo_failure_scan_stack.clear(true); // Clear cached segments. remove_forwarding_pointers(); if (PrintGCDetails) { @@ -620,7 +611,7 @@ // case there can be live objects in to-space // as a result of a partial evacuation of eden // and from-space. - swap_spaces(); // For the sake of uniformity wrt ParNewGeneration::collect(). + swap_spaces(); // For uniformity wrt ParNewGeneration. from()->set_next_compaction_space(to()); gch->set_incremental_collection_will_fail(); @@ -653,34 +644,23 @@ RemoveForwardPointerClosure rspc; eden()->object_iterate(&rspc); from()->object_iterate(&rspc); + // Now restore saved marks, if any. - if (_objs_with_preserved_marks != NULL) { - assert(_preserved_marks_of_objs != NULL, "Both or none."); - assert(_objs_with_preserved_marks->length() == - _preserved_marks_of_objs->length(), "Both or none."); - for (int i = 0; i < _objs_with_preserved_marks->length(); i++) { - oop obj = _objs_with_preserved_marks->at(i); - markOop m = _preserved_marks_of_objs->at(i); - obj->set_mark(m); - } - delete _objs_with_preserved_marks; - delete _preserved_marks_of_objs; - _objs_with_preserved_marks = NULL; - _preserved_marks_of_objs = NULL; + assert(_objs_with_preserved_marks.size() == _preserved_marks_of_objs.size(), + "should be the same"); + while (!_objs_with_preserved_marks.is_empty()) { + oop obj = _objs_with_preserved_marks.pop(); + markOop m = _preserved_marks_of_objs.pop(); + obj->set_mark(m); } + _objs_with_preserved_marks.clear(true); + _preserved_marks_of_objs.clear(true); } void DefNewGeneration::preserve_mark_if_necessary(oop obj, markOop m) { if (m->must_be_preserved_for_promotion_failure(obj)) { - if (_objs_with_preserved_marks == NULL) { - assert(_preserved_marks_of_objs == NULL, "Both or none."); - _objs_with_preserved_marks = new (ResourceObj::C_HEAP) - GrowableArray(PreserveMarkStackSize, true); - _preserved_marks_of_objs = new (ResourceObj::C_HEAP) - GrowableArray(PreserveMarkStackSize, true); - } - _objs_with_preserved_marks->push(obj); - _preserved_marks_of_objs->push(m); + _objs_with_preserved_marks.push(obj); + _preserved_marks_of_objs.push(m); } } @@ -695,7 +675,7 @@ old->forward_to(old); _promotion_failed = true; - push_on_promo_failure_scan_stack(old); + _promo_failure_scan_stack.push(old); if (!_promo_failure_drain_in_progress) { // prevent recursion in copy_to_survivor_space() @@ -748,20 +728,9 @@ return obj; } -void DefNewGeneration::push_on_promo_failure_scan_stack(oop obj) { - if (_promo_failure_scan_stack == NULL) { - _promo_failure_scan_stack = new (ResourceObj::C_HEAP) - GrowableArray(40, true); - } - - _promo_failure_scan_stack->push(obj); -} - void DefNewGeneration::drain_promo_failure_scan_stack() { - assert(_promo_failure_scan_stack != NULL, "precondition"); - - while (_promo_failure_scan_stack->length() > 0) { - oop obj = _promo_failure_scan_stack->pop(); + while (!_promo_failure_scan_stack.is_empty()) { + oop obj = _promo_failure_scan_stack.pop(); obj->oop_iterate(_promo_failure_scan_stack_closure); } } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/defNewGeneration.hpp --- a/src/share/vm/memory/defNewGeneration.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/defNewGeneration.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -77,10 +77,10 @@ // word being overwritten with a self-forwarding-pointer. void preserve_mark_if_necessary(oop obj, markOop m); - // When one is non-null, so is the other. Together, they each pair is - // an object with a preserved mark, and its mark value. - GrowableArray* _objs_with_preserved_marks; - GrowableArray* _preserved_marks_of_objs; + // Together, these keep pairs. + // They should always contain the same number of elements. + Stack _objs_with_preserved_marks; + Stack _preserved_marks_of_objs; // Returns true if the collection can be safely attempted. // If this method returns false, a collection is not @@ -94,11 +94,7 @@ _promo_failure_scan_stack_closure = scan_stack_closure; } - GrowableArray* _promo_failure_scan_stack; - GrowableArray* promo_failure_scan_stack() const { - return _promo_failure_scan_stack; - } - void push_on_promo_failure_scan_stack(oop); + Stack _promo_failure_scan_stack; void drain_promo_failure_scan_stack(void); bool _promo_failure_drain_in_progress; @@ -184,8 +180,6 @@ void do_void(); }; - class FastEvacuateFollowersClosure; - friend class FastEvacuateFollowersClosure; class FastEvacuateFollowersClosure: public VoidClosure { GenCollectedHeap* _gch; int _level; @@ -336,6 +330,10 @@ void verify(bool allow_dirty); + bool promo_failure_scan_is_complete() const { + return _promo_failure_scan_stack.is_empty(); + } + protected: // If clear_space is true, clear the survivor spaces. Eden is // cleared if the minimum size of eden is 0. If mangle_space diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -676,7 +676,7 @@ void GenCollectedHeap::set_par_threads(int t) { SharedHeap::set_par_threads(t); - _gen_process_strong_tasks->set_par_threads(t); + _gen_process_strong_tasks->set_n_threads(t); } class AssertIsPermClosure: public OopClosure { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/genCollectedHeap.hpp --- a/src/share/vm/memory/genCollectedHeap.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/genCollectedHeap.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,6 +74,7 @@ // Data structure for claiming the (potentially) parallel tasks in // (gen-specific) strong roots processing. SubTasksDone* _gen_process_strong_tasks; + SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; } // In block contents verification, the number of header words to skip NOT_PRODUCT(static size_t _skip_header_HeapWords;) diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/genMarkSweep.cpp --- a/src/share/vm/memory/genMarkSweep.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/genMarkSweep.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -161,17 +161,6 @@ _preserved_marks = (PreservedMark*)scratch; _preserved_count = 0; - _preserved_mark_stack = NULL; - _preserved_oop_stack = NULL; - - _marking_stack = new (ResourceObj::C_HEAP) GrowableArray(4000, true); - _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray(50, true); - - int size = SystemDictionary::number_of_classes() * 2; - _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray(size, true); - // (#klass/k)^2 for k ~ 10 appears to be a better fit, but this will have to do for - // now until we have had a chance to investigate a more optimal setting. - _revisit_mdo_stack = new (ResourceObj::C_HEAP) GrowableArray(2*size, true); #ifdef VALIDATE_MARK_SWEEP if (ValidateMarkSweep) { @@ -206,17 +195,12 @@ gch->release_scratch(); } - if (_preserved_oop_stack) { - delete _preserved_mark_stack; - _preserved_mark_stack = NULL; - delete _preserved_oop_stack; - _preserved_oop_stack = NULL; - } - - delete _marking_stack; - delete _objarray_stack; - delete _revisit_klass_stack; - delete _revisit_mdo_stack; + _preserved_mark_stack.clear(true); + _preserved_oop_stack.clear(true); + _marking_stack.clear(); + _objarray_stack.clear(true); + _revisit_klass_stack.clear(true); + _revisit_mdo_stack.clear(true); #ifdef VALIDATE_MARK_SWEEP if (ValidateMarkSweep) { @@ -274,17 +258,17 @@ // Update subklass/sibling/implementor links of live klasses follow_weak_klass_links(); - assert(_marking_stack->is_empty(), "just drained"); + assert(_marking_stack.is_empty(), "just drained"); // Visit memoized MDO's and clear any unmarked weak refs follow_mdo_weak_refs(); - assert(_marking_stack->is_empty(), "just drained"); + assert(_marking_stack.is_empty(), "just drained"); // Visit symbol and interned string tables and delete unmarked oops SymbolTable::unlink(&is_alive); StringTable::unlink(&is_alive); - assert(_marking_stack->is_empty(), "stack should be empty by now"); + assert(_marking_stack.is_empty(), "stack should be empty by now"); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/permGen.cpp --- a/src/share/vm/memory/permGen.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/permGen.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,17 @@ #include "incls/_precompiled.incl" #include "incls/_permGen.cpp.incl" +HeapWord* PermGen::request_expand_and_allocate(Generation* gen, size_t size, + GCCause::Cause prev_cause) { + if (gen->capacity() < _capacity_expansion_limit || + prev_cause != GCCause::_no_gc || UseG1GC) { // last disjunct is a temporary hack for G1 + return gen->expand_and_allocate(size, false); + } + // We have reached the limit of capacity expansion where + // we will not expand further until a GC is done; request denied. + return NULL; +} + HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) { GCCause::Cause next_cause = GCCause::_permanent_generation_full; GCCause::Cause prev_cause = GCCause::_no_gc; @@ -37,10 +48,14 @@ if ((obj = gen->allocate(size, false)) != NULL) { return obj; } - if (gen->capacity() < _capacity_expansion_limit || - prev_cause != GCCause::_no_gc) { - obj = gen->expand_and_allocate(size, false); - } + // Attempt to expand and allocate the requested space: + // specific subtypes may use specific policy to either expand + // or not. The default policy (see above) is to expand until + // _capacity_expansion_limit, and no further unless a GC is done. + // Concurrent collectors may decide to kick off a concurrent + // collection under appropriate conditions. + obj = request_expand_and_allocate(gen, size, prev_cause); + if (obj != NULL || prev_cause == GCCause::_last_ditch_collection) { return obj; } @@ -119,5 +134,5 @@ if (_gen->capacity() > desired_capacity) { _gen->shrink(_gen->capacity() - desired_capacity); } - _capacity_expansion_limit = _gen->capacity() + MaxPermHeapExpansion; + set_capacity_expansion_limit(_gen->capacity() + MaxPermHeapExpansion); } diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/permGen.hpp --- a/src/share/vm/memory/permGen.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/permGen.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,15 +30,26 @@ class GenRemSet; class CSpaceCounters; -// PermGen models the part of the heap +// PermGen models the part of the heap used to allocate class meta-data. class PermGen : public CHeapObj { friend class VMStructs; protected: size_t _capacity_expansion_limit; // maximum expansion allowed without a // full gc occurring + void set_capacity_expansion_limit(size_t limit) { + assert_locked_or_safepoint(Heap_lock); + _capacity_expansion_limit = limit; + } HeapWord* mem_allocate_in_gen(size_t size, Generation* gen); + // Along with mem_allocate_in_gen() above, implements policy for + // "scheduling" allocation/expansion/collection of the perm gen. + // The virtual method request_...() below can be overridden by + // subtypes that want to implement a different expansion/collection + // policy from the default provided. + virtual HeapWord* request_expand_and_allocate(Generation* gen, size_t size, + GCCause::Cause prev_cause); public: enum Name { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/referenceProcessor.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -137,16 +137,17 @@ _discovery_is_atomic = atomic_discovery; _discovery_is_mt = mt_discovery; _num_q = mt_degree; - _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _num_q * subclasses_of_ref); + _max_num_q = mt_degree; + _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref); if (_discoveredSoftRefs == NULL) { vm_exit_during_initialization("Could not allocated RefProc Array"); } - _discoveredWeakRefs = &_discoveredSoftRefs[_num_q]; - _discoveredFinalRefs = &_discoveredWeakRefs[_num_q]; - _discoveredPhantomRefs = &_discoveredFinalRefs[_num_q]; + _discoveredWeakRefs = &_discoveredSoftRefs[_max_num_q]; + _discoveredFinalRefs = &_discoveredWeakRefs[_max_num_q]; + _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q]; assert(sentinel_ref() != NULL, "_sentinelRef is NULL"); // Initialized all entries to _sentinelRef - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); } @@ -159,7 +160,7 @@ #ifndef PRODUCT void ReferenceProcessor::verify_no_references_recorded() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { guarantee(_discoveredSoftRefs[i].empty(), "Found non-empty discovered list"); } @@ -167,7 +168,11 @@ #endif void ReferenceProcessor::weak_oops_do(OopClosure* f) { - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + // Should this instead be + // for (int i = 0; i < subclasses_of_ref; i++_ { + // for (int j = 0; j < _num_q; j++) { + // int index = i * _max_num_q + j; + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { if (UseCompressedOops) { f->do_oop((narrowOop*)_discoveredSoftRefs[i].adr_head()); } else { @@ -395,7 +400,15 @@ assert(work_id < (unsigned int)_ref_processor.num_q(), "Index out-of-bounds"); // Simplest first cut: static partitioning. int index = work_id; - for (int j = 0; j < subclasses_of_ref; j++, index += _n_queues) { + // The increment on "index" must correspond to the maximum number of queues + // (n_queues) with which that ReferenceProcessor was created. That + // is because of the "clever" way the discovered references lists were + // allocated and are indexed into. That number is ParallelGCThreads + // currently. Assert that. + assert(_n_queues == (int) ParallelGCThreads, "Different number not expected"); + for (int j = 0; + j < subclasses_of_ref; + j++, index += _n_queues) { _ref_processor.enqueue_discovered_reflist( _refs_lists[index], _pending_list_addr); _refs_lists[index].set_head(_sentinel_ref); @@ -410,11 +423,11 @@ if (_processing_is_mt && task_executor != NULL) { // Parallel code RefProcEnqueueTask tsk(*this, _discoveredSoftRefs, - pending_list_addr, sentinel_ref(), _num_q); + pending_list_addr, sentinel_ref(), _max_num_q); task_executor->execute(tsk); } else { // Serial code: call the parent class's implementation - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr); _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); @@ -614,8 +627,9 @@ complete_gc->do_void(); NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d dead Refs out of %d " - "discovered Refs by policy ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d dead Refs out of %d " + "discovered Refs by policy list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -651,8 +665,9 @@ } NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d active Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d active Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -689,8 +704,9 @@ complete_gc->do_void(); NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d active Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d active Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -704,6 +720,7 @@ BoolObjectClosure* is_alive, OopClosure* keep_alive, VoidClosure* complete_gc) { + ResourceMark rm; DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.update_discovered(); @@ -743,8 +760,8 @@ void ReferenceProcessor::abandon_partial_discovery() { // loop over the lists - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { - if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) { gclog_or_tty->print_cr( "\nAbandoning %s discovered list", list_name(i)); @@ -766,7 +783,9 @@ OopClosure& keep_alive, VoidClosure& complete_gc) { - _ref_processor.process_phase1(_refs_lists[i], _policy, + Thread* thr = Thread::current(); + int refs_list_index = ((WorkerThread*)thr)->id(); + _ref_processor.process_phase1(_refs_lists[refs_list_index], _policy, &is_alive, &keep_alive, &complete_gc); } private: @@ -802,6 +821,11 @@ OopClosure& keep_alive, VoidClosure& complete_gc) { + // Don't use "refs_list_index" calculated in this way because + // balance_queues() has moved the Ref's into the first n queues. + // Thread* thr = Thread::current(); + // int refs_list_index = ((WorkerThread*)thr)->id(); + // _ref_processor.process_phase3(_refs_lists[refs_list_index], _clear_referent, _ref_processor.process_phase3(_refs_lists[i], _clear_referent, &is_alive, &keep_alive, &complete_gc); } @@ -810,23 +834,47 @@ }; // Balances reference queues. +// Move entries from all queues[0, 1, ..., _max_num_q-1] to +// queues[0, 1, ..., _num_q-1] because only the first _num_q +// corresponding to the active workers will be processed. void ReferenceProcessor::balance_queues(DiscoveredList ref_lists[]) { // calculate total length size_t total_refs = 0; - for (int i = 0; i < _num_q; ++i) { + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr("\nBalance ref_lists "); + } + + for (int i = 0; i < _max_num_q; ++i) { total_refs += ref_lists[i].length(); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print("%d ", ref_lists[i].length()); + } + } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" = %d", total_refs); } size_t avg_refs = total_refs / _num_q + 1; int to_idx = 0; - for (int from_idx = 0; from_idx < _num_q; from_idx++) { - while (ref_lists[from_idx].length() > avg_refs) { + for (int from_idx = 0; from_idx < _max_num_q; from_idx++) { + bool move_all = false; + if (from_idx >= _num_q) { + move_all = ref_lists[from_idx].length() > 0; + } + while ((ref_lists[from_idx].length() > avg_refs) || + move_all) { assert(to_idx < _num_q, "Sanity Check!"); if (ref_lists[to_idx].length() < avg_refs) { // move superfluous refs - size_t refs_to_move = - MIN2(ref_lists[from_idx].length() - avg_refs, - avg_refs - ref_lists[to_idx].length()); + size_t refs_to_move; + // Move all the Ref's if the from queue will not be processed. + if (move_all) { + refs_to_move = MIN2(ref_lists[from_idx].length(), + avg_refs - ref_lists[to_idx].length()); + } else { + refs_to_move = MIN2(ref_lists[from_idx].length() - avg_refs, + avg_refs - ref_lists[to_idx].length()); + } oop move_head = ref_lists[from_idx].head(); oop move_tail = move_head; oop new_head = move_head; @@ -840,11 +888,35 @@ ref_lists[to_idx].inc_length(refs_to_move); ref_lists[from_idx].set_head(new_head); ref_lists[from_idx].dec_length(refs_to_move); + if (ref_lists[from_idx].length() == 0) { + break; + } } else { - ++to_idx; + to_idx = (to_idx + 1) % _num_q; } } } +#ifdef ASSERT + size_t balanced_total_refs = 0; + for (int i = 0; i < _max_num_q; ++i) { + balanced_total_refs += ref_lists[i].length(); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print("%d ", ref_lists[i].length()); + } + } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" = %d", balanced_total_refs); + gclog_or_tty->flush(); + } + assert(total_refs == balanced_total_refs, "Balancing was incomplete"); +#endif +} + +void ReferenceProcessor::balance_all_queues() { + balance_queues(_discoveredSoftRefs); + balance_queues(_discoveredWeakRefs); + balance_queues(_discoveredFinalRefs); + balance_queues(_discoveredPhantomRefs); } void @@ -857,8 +929,17 @@ VoidClosure* complete_gc, AbstractRefProcTaskExecutor* task_executor) { - bool mt = task_executor != NULL && _processing_is_mt; - if (mt && ParallelRefProcBalancingEnabled) { + bool mt_processing = task_executor != NULL && _processing_is_mt; + // If discovery used MT and a dynamic number of GC threads, then + // the queues must be balanced for correctness if fewer than the + // maximum number of queues were used. The number of queue used + // during discovery may be different than the number to be used + // for processing so don't depend of _num_q < _max_num_q as part + // of the test. + bool must_balance = _discovery_is_mt; + + if ((mt_processing && ParallelRefProcBalancingEnabled) || + must_balance) { balance_queues(refs_lists); } if (PrintReferenceGC && PrintGCDetails) { @@ -875,7 +956,7 @@ // policy reasons. Keep alive the transitive closure of all // such referents. if (policy != NULL) { - if (mt) { + if (mt_processing) { RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/); task_executor->execute(phase1); } else { @@ -891,7 +972,7 @@ // Phase 2: // . Traverse the list and remove any refs whose referents are alive. - if (mt) { + if (mt_processing) { RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/); task_executor->execute(phase2); } else { @@ -902,7 +983,7 @@ // Phase 3: // . Traverse the list and process referents as appropriate. - if (mt) { + if (mt_processing) { RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/); task_executor->execute(phase3); } else { @@ -915,7 +996,11 @@ void ReferenceProcessor::clean_up_discovered_references() { // loop over the lists - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + // Should this instead be + // for (int i = 0; i < subclasses_of_ref; i++_ { + // for (int j = 0; j < _num_q; j++) { + // int index = i * _max_num_q + j; + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { gclog_or_tty->print_cr( "\nScrubbing %s discovered list of Null referents", @@ -976,7 +1061,7 @@ id = next_id(); } } - assert(0 <= id && id < _num_q, "Id is out-of-bounds (call Freud?)"); + assert(0 <= id && id < _max_num_q, "Id is out-of-bounds (call Freud?)"); // Get the discovered queue to which we will add DiscoveredList* list = NULL; @@ -1001,6 +1086,10 @@ default: ShouldNotReachHere(); } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr("Thread %d gets list " INTPTR_FORMAT, + id, list); + } return list; } @@ -1243,7 +1332,7 @@ { TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { if (yield->should_return()) { return; } @@ -1340,15 +1429,16 @@ NOT_PRODUCT( if (PrintGCDetails && PrintReferenceGC) { - gclog_or_tty->print(" Dropped %d Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } const char* ReferenceProcessor::list_name(int i) { - assert(i >= 0 && i <= _num_q * subclasses_of_ref, "Out of bounds index"); - int j = i / _num_q; + assert(i >= 0 && i <= _max_num_q * subclasses_of_ref, "Out of bounds index"); + int j = i / _max_num_q; switch (j) { case 0: return "SoftRef"; case 1: return "WeakRef"; @@ -1372,7 +1462,7 @@ #ifndef PRODUCT void ReferenceProcessor::clear_discovered_references() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { oop obj = _discoveredSoftRefs[i].head(); while (obj != sentinel_ref()) { oop next = java_lang_ref_Reference::discovered(obj); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/referenceProcessor.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -85,8 +85,10 @@ // The discovered ref lists themselves - // The MT'ness degree of the queues below + // The active MT'ness degree of the queues below int _num_q; + // The maximum MT'ness degree of the queues below + int _max_num_q; // Arrays of lists of oops, one per thread DiscoveredList* _discoveredSoftRefs; DiscoveredList* _discoveredWeakRefs; @@ -95,6 +97,7 @@ public: int num_q() { return _num_q; } + void set_mt_degree(int v) { _num_q = v; } DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } static oop sentinel_ref() { return _sentinelRef; } static oop* adr_sentinel_ref() { return &_sentinelRef; } @@ -244,6 +247,7 @@ _bs(NULL), _is_alive_non_header(NULL), _num_q(0), + _max_num_q(0), _processing_is_mt(false), _next_id(0) {} @@ -312,6 +316,9 @@ void weak_oops_do(OopClosure* f); // weak roots static void oops_do(OopClosure* f); // strong root(s) + // Balance each of the discovered lists. + void balance_all_queues(); + // Discover a Reference object, using appropriate discovery criteria bool discover_reference(oop obj, ReferenceType rt); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/sharedHeap.cpp --- a/src/share/vm/memory/sharedHeap.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/sharedHeap.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,7 +50,8 @@ _perm_gen(NULL), _rem_set(NULL), _strong_roots_parity(0), _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)), - _workers(NULL), _n_par_threads(0) + _n_par_threads(0), + _workers(NULL) { if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { vm_exit_during_initialization("Failed necessary allocation."); @@ -60,11 +61,13 @@ (UseConcMarkSweepGC && CMSParallelRemarkEnabled) || UseG1GC) && ParallelGCThreads > 0) { - _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, + _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); if (_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); + } else { + _workers->initialize_workers(); } } } @@ -77,8 +80,9 @@ } void SharedHeap::set_par_threads(int t) { + assert(t == 0 || !UseSerialGC, "Cannot have parallel threads"); _n_par_threads = t; - _process_strong_tasks->set_par_threads(t); + _process_strong_tasks->set_n_threads(t); } class AssertIsPermClosure: public OopClosure { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/memory/sharedHeap.hpp --- a/src/share/vm/memory/sharedHeap.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/memory/sharedHeap.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ class ObjectClosure; class SubTasksDone; class WorkGang; +class FlexibleWorkGang; class CollectorPolicy; class KlassHandle; @@ -74,7 +75,7 @@ int _strong_roots_parity; // If we're doing parallel GC, use this gang of threads. - WorkGang* _workers; + FlexibleWorkGang* _workers; // Number of parallel threads currently working on GC tasks. // O indicates use sequential code; 1 means use parallel code even with @@ -189,7 +190,7 @@ SO_CodeCache = 0x10 }; - WorkGang* workers() const { return _workers; } + FlexibleWorkGang* workers() const { return _workers; } // Sets the number of parallel threads that will be doing tasks // (such as process strong roots) subsequently. diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/runtime/globals.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -273,6 +273,10 @@ // UnlockExperimentalVMOptions flag, which allows the control and // modification of the experimental flags. // +// Nota bene: neither diagnostic nor experimental options should be used casually, +// and they are not supported on production loads, except under explicit +// direction from support engineers. +// // manageable flags are writeable external product flags. // They are dynamically writeable through the JDK management interface // (com.sun.management.HotSpotDiagnosticMXBean API) and also through JConsole. @@ -634,6 +638,9 @@ develop(bool, ZapJNIHandleArea, trueInDebug, \ "Zap freed JNI handle space with 0xFEFEFEFE") \ \ + notproduct(bool, ZapStackSegments, trueInDebug, \ + "Zap allocated/freed Stack segments with 0xFADFADED") \ + \ develop(bool, ZapUnusedHeapArea, trueInDebug, \ "Zap unused heap space with 0xBAADBABE") \ \ @@ -1799,17 +1806,17 @@ develop(uintx, PromotionFailureALotInterval, 5, \ "Total collections between promotion failures alot") \ \ - develop(intx, WorkStealingSleepMillis, 1, \ + experimental(intx, WorkStealingSleepMillis, 1, \ "Sleep time when sleep is used for yields") \ \ - develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \ + experimental(uintx, WorkStealingYieldsBeforeSleep, 1000, \ "Number of yields before a sleep is done during workstealing") \ \ - develop(uintx, WorkStealingHardSpins, 4096, \ + experimental(uintx, WorkStealingHardSpins, 4096, \ "Number of iterations in a spin loop between checks on " \ "time out of hard spin") \ \ - develop(uintx, WorkStealingSpinToYieldRatio, 10, \ + experimental(uintx, WorkStealingSpinToYieldRatio, 10, \ "Ratio of hard spins to calls to yield") \ \ product(uintx, PreserveMarkStackSize, 1024, \ diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/runtime/thread.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1645,7 +1645,29 @@ satb_mark_queue().flush(); dirty_card_queue().flush(); } -#endif + +void JavaThread::initialize_queues() { + assert(!SafepointSynchronize::is_at_safepoint(), + "we should not be at a safepoint"); + + ObjPtrQueue& satb_queue = satb_mark_queue(); + SATBMarkQueueSet& satb_queue_set = satb_mark_queue_set(); + // The SATB queue should have been constructed with its active + // field set to false. + assert(!satb_queue.is_active(), "SATB queue should not be active"); + assert(satb_queue.is_empty(), "SATB queue should be empty"); + // If we are creating the thread during a marking cycle, we should + // set the active field of the SATB queue to true. + if (satb_queue_set.is_active()) { + satb_queue.set_active(true); + } + + DirtyCardQueue& dirty_queue = dirty_card_queue(); + // The dirty card queue should have been constructed with its + // active field set to true. + assert(dirty_queue.is_active(), "dirty card queue should be active"); +} +#endif // !SERIALGC void JavaThread::cleanup_failed_attach_current_thread() { if (get_thread_profiler() != NULL) { @@ -3627,6 +3649,10 @@ void Threads::add(JavaThread* p, bool force_daemon) { // The threads lock must be owned at this point assert_locked_or_safepoint(Threads_lock); + + // See the comment for this method in thread.hpp for its purpose and + // why it is called here. + p->initialize_queues(); p->set_next(_thread_list); _thread_list = p; _number_of_threads++; diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/runtime/thread.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1490,6 +1490,29 @@ } #endif // !SERIALGC + // This method initializes the SATB and dirty card queues before a + // JavaThread is added to the Java thread list. Right now, we don't + // have to do anything to the dirty card queue (it should have been + // activated when the thread was created), but we have to activate + // the SATB queue if the thread is created while a marking cycle is + // in progress. The activation / de-activation of the SATB queues at + // the beginning / end of a marking cycle is done during safepoints + // so we have to make sure this method is called outside one to be + // able to safely read the active field of the SATB queue set. Right + // now, it is called just before the thread is added to the Java + // thread list in the Threads::add() method. That method is holding + // the Threads_lock which ensures we are outside a safepoint. We + // cannot do the obvious and set the active field of the SATB queue + // when the thread is created given that, in some cases, safepoints + // might happen between the JavaThread constructor being called and the + // thread being added to the Java thread list (an example of this is + // when the structure for the DestroyJavaVM thread is created). +#ifndef SERIALGC + void initialize_queues(); +#else // !SERIALGC + void initialize_queues() { } +#endif // !SERIALGC + // Machine dependent stuff #include "incls/_thread_pd.hpp.incl" diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/stack.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/stack.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -0,0 +1,204 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Class Stack (below) grows and shrinks by linking together "segments" which +// are allocated on demand. Segments are arrays of the element type (E) plus an +// extra pointer-sized field to store the segment link. Recently emptied +// segments are kept in a cache and reused. +// +// Notes/caveats: +// +// The size of an element must either evenly divide the size of a pointer or be +// a multiple of the size of a pointer. +// +// Destructors are not called for elements popped off the stack, so element +// types which rely on destructors for things like reference counting will not +// work properly. +// +// Class Stack allocates segments from the C heap. However, two protected +// virtual methods are used to alloc/free memory which subclasses can override: +// +// virtual void* alloc(size_t bytes); +// virtual void free(void* addr, size_t bytes); +// +// The alloc() method must return storage aligned for any use. The +// implementation in class Stack assumes that alloc() will terminate the process +// if the allocation fails. + +template class StackIterator; + +// StackBase holds common data/methods that don't depend on the element type, +// factored out to reduce template code duplication. +class StackBase +{ +public: + size_t segment_size() const { return _seg_size; } // Elements per segment. + size_t max_size() const { return _max_size; } // Max elements allowed. + size_t max_cache_size() const { return _max_cache_size; } // Max segments + // allowed in cache. + + size_t cache_size() const { return _cache_size; } // Segments in the cache. + +protected: + // The ctor arguments correspond to the like-named functions above. + // segment_size: number of items per segment + // max_cache_size: maxmium number of *segments* to cache + // max_size: maximum number of items allowed, rounded to a multiple of + // the segment size (0 == unlimited) + inline StackBase(size_t segment_size, size_t max_cache_size, size_t max_size); + + // Round max_size to a multiple of the segment size. Treat 0 as unlimited. + static inline size_t adjust_max_size(size_t max_size, size_t seg_size); + +protected: + const size_t _seg_size; // Number of items per segment. + const size_t _max_size; // Maximum number of items allowed in the stack. + const size_t _max_cache_size; // Maximum number of segments to cache. + size_t _cur_seg_size; // Number of items in the current segment. + size_t _full_seg_size; // Number of items in already-filled segments. + size_t _cache_size; // Number of segments in the cache. +}; + +#ifdef __GNUC__ +#define inline +#endif // __GNUC__ + +template +class Stack: public StackBase +{ +public: + friend class StackIterator; + + // segment_size: number of items per segment + // max_cache_size: maxmium number of *segments* to cache + // max_size: maximum number of items allowed, rounded to a multiple of + // the segment size (0 == unlimited) + inline Stack(size_t segment_size = default_segment_size(), + size_t max_cache_size = 4, size_t max_size = 0); + inline ~Stack() { clear(true); } + + inline bool is_empty() const { return _cur_seg == NULL; } + inline bool is_full() const { return _full_seg_size >= max_size(); } + + // Performance sensitive code should use is_empty() instead of size() == 0 and + // is_full() instead of size() == max_size(). Using a conditional here allows + // just one var to be updated when pushing/popping elements instead of two; + // _full_seg_size is updated only when pushing/popping segments. + inline size_t size() const { + return is_empty() ? 0 : _full_seg_size + _cur_seg_size; + } + + inline void push(E elem); + inline E pop(); + + // Clear everything from the stack, releasing the associated memory. If + // clear_cache is true, also release any cached segments. + void clear(bool clear_cache = false); + + static inline size_t default_segment_size(); + +protected: + // Each segment includes space for _seg_size elements followed by a link + // (pointer) to the previous segment; the space is allocated as a single block + // of size segment_bytes(). _seg_size is rounded up if necessary so the link + // is properly aligned. The C struct for the layout would be: + // + // struct segment { + // E elements[_seg_size]; + // E* link; + // }; + + // Round up seg_size to keep the link field aligned. + static inline size_t adjust_segment_size(size_t seg_size); + + // Methods for allocation size and getting/setting the link. + inline size_t link_offset() const; // Byte offset of link field. + inline size_t segment_bytes() const; // Segment size in bytes. + inline E** link_addr(E* seg) const; // Address of the link field. + inline E* get_link(E* seg) const; // Extract the link from seg. + inline E* set_link(E* new_seg, E* old_seg); // new_seg.link = old_seg. + + virtual E* alloc(size_t bytes); + virtual void free(E* addr, size_t bytes); + + void push_segment(); + void pop_segment(); + + void free_segments(E* seg); // Free all segments in the list. + inline void reset(bool reset_cache); // Reset all data fields. + + DEBUG_ONLY(void verify(bool at_empty_transition) const;) + DEBUG_ONLY(void zap_segment(E* seg, bool zap_link_field) const;) + +private: + E* _cur_seg; // Current segment. + E* _cache; // Segment cache to avoid ping-ponging. +}; + +template class ResourceStack: public Stack, public ResourceObj +{ +public: + // If this class becomes widely used, it may make sense to save the Thread + // and use it when allocating segments. + ResourceStack(size_t segment_size = Stack::default_segment_size()): + Stack(segment_size, max_uintx) + { } + + // Set the segment pointers to NULL so the parent dtor does not free them; + // that must be done by the ResourceMark code. + ~ResourceStack() { Stack::reset(true); } + +protected: + virtual E* alloc(size_t bytes); + virtual void free(E* addr, size_t bytes); + +private: + void clear(bool clear_cache = false); +}; + +template +class StackIterator: public StackObj +{ +public: + StackIterator(Stack& stack): _stack(stack) { sync(); } + + Stack& stack() const { return _stack; } + + bool is_empty() const { return _cur_seg == NULL; } + + E next() { return *next_addr(); } + E* next_addr(); + + void sync(); // Sync the iterator's state to the stack's current state. + +private: + Stack& _stack; + size_t _cur_seg_size; + E* _cur_seg; + size_t _full_seg_size; +}; + +#ifdef __GNUC__ +#undef inline +#endif // __GNUC__ diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/stack.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/utilities/stack.inline.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -0,0 +1,273 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +StackBase::StackBase(size_t segment_size, size_t max_cache_size, + size_t max_size): + _seg_size(segment_size), + _max_cache_size(max_cache_size), + _max_size(adjust_max_size(max_size, segment_size)) +{ + assert(_max_size % _seg_size == 0, "not a multiple"); +} + +size_t StackBase::adjust_max_size(size_t max_size, size_t seg_size) +{ + assert(seg_size > 0, "cannot be 0"); + assert(max_size >= seg_size || max_size == 0, "max_size too small"); + const size_t limit = max_uintx - (seg_size - 1); + if (max_size == 0 || max_size > limit) { + max_size = limit; + } + return (max_size + seg_size - 1) / seg_size * seg_size; +} + +template +Stack::Stack(size_t segment_size, size_t max_cache_size, size_t max_size): + StackBase(adjust_segment_size(segment_size), max_cache_size, max_size) +{ + reset(true); +} + +template +void Stack::push(E item) +{ + assert(!is_full(), "pushing onto a full stack"); + if (_cur_seg_size == _seg_size) { + push_segment(); + } + _cur_seg[_cur_seg_size] = item; + ++_cur_seg_size; +} + +template +E Stack::pop() +{ + assert(!is_empty(), "popping from an empty stack"); + if (_cur_seg_size == 1) { + E tmp = _cur_seg[--_cur_seg_size]; + pop_segment(); + return tmp; + } + return _cur_seg[--_cur_seg_size]; +} + +template +void Stack::clear(bool clear_cache) +{ + free_segments(_cur_seg); + if (clear_cache) free_segments(_cache); + reset(clear_cache); +} + +template +size_t Stack::default_segment_size() +{ + // Number of elements that fit in 4K bytes minus the size of two pointers + // (link field and malloc header). + return (4096 - 2 * sizeof(E*)) / sizeof(E); +} + +template +size_t Stack::adjust_segment_size(size_t seg_size) +{ + const size_t elem_sz = sizeof(E); + const size_t ptr_sz = sizeof(E*); + assert(elem_sz % ptr_sz == 0 || ptr_sz % elem_sz == 0, "bad element size"); + if (elem_sz < ptr_sz) { + return align_size_up(seg_size * elem_sz, ptr_sz) / elem_sz; + } + return seg_size; +} + +template +size_t Stack::link_offset() const +{ + return align_size_up(_seg_size * sizeof(E), sizeof(E*)); +} + +template +size_t Stack::segment_bytes() const +{ + return link_offset() + sizeof(E*); +} + +template +E** Stack::link_addr(E* seg) const +{ + return (E**) ((char*)seg + link_offset()); +} + +template +E* Stack::get_link(E* seg) const +{ + return *link_addr(seg); +} + +template +E* Stack::set_link(E* new_seg, E* old_seg) +{ + *link_addr(new_seg) = old_seg; + return new_seg; +} + +template +E* Stack::alloc(size_t bytes) +{ + return (E*) NEW_C_HEAP_ARRAY(char, bytes); +} + +template +void Stack::free(E* addr, size_t bytes) +{ + FREE_C_HEAP_ARRAY(char, (char*) addr); +} + +template +void Stack::push_segment() +{ + assert(_cur_seg_size == _seg_size, "current segment is not full"); + E* next; + if (_cache_size > 0) { + // Use a cached segment. + next = _cache; + _cache = get_link(_cache); + --_cache_size; + } else { + next = alloc(segment_bytes()); + DEBUG_ONLY(zap_segment(next, true);) + } + const bool at_empty_transition = is_empty(); + _cur_seg = set_link(next, _cur_seg); + _cur_seg_size = 0; + _full_seg_size += at_empty_transition ? 0 : _seg_size; + DEBUG_ONLY(verify(at_empty_transition);) +} + +template +void Stack::pop_segment() +{ + assert(_cur_seg_size == 0, "current segment is not empty"); + E* const prev = get_link(_cur_seg); + if (_cache_size < _max_cache_size) { + // Add the current segment to the cache. + DEBUG_ONLY(zap_segment(_cur_seg, false);) + _cache = set_link(_cur_seg, _cache); + ++_cache_size; + } else { + DEBUG_ONLY(zap_segment(_cur_seg, true);) + free(_cur_seg, segment_bytes()); + } + const bool at_empty_transition = prev == NULL; + _cur_seg = prev; + _cur_seg_size = _seg_size; + _full_seg_size -= at_empty_transition ? 0 : _seg_size; + DEBUG_ONLY(verify(at_empty_transition);) +} + +template +void Stack::free_segments(E* seg) +{ + const size_t bytes = segment_bytes(); + while (seg != NULL) { + E* const prev = get_link(seg); + free(seg, bytes); + seg = prev; + } +} + +template +void Stack::reset(bool reset_cache) +{ + _cur_seg_size = _seg_size; // So push() will alloc a new segment. + _full_seg_size = 0; + _cur_seg = NULL; + if (reset_cache) { + _cache_size = 0; + _cache = NULL; + } +} + +#ifdef ASSERT +template +void Stack::verify(bool at_empty_transition) const +{ + assert(size() <= max_size(), "stack exceeded bounds"); + assert(cache_size() <= max_cache_size(), "cache exceeded bounds"); + assert(_cur_seg_size <= segment_size(), "segment index exceeded bounds"); + + assert(_full_seg_size % _seg_size == 0, "not a multiple"); + assert(at_empty_transition || is_empty() == (size() == 0), "mismatch"); + assert((_cache == NULL) == (cache_size() == 0), "mismatch"); + + if (is_empty()) { + assert(_cur_seg_size == segment_size(), "sanity"); + } +} + +template +void Stack::zap_segment(E* seg, bool zap_link_field) const +{ + if (!ZapStackSegments) return; + const size_t zap_bytes = segment_bytes() - (zap_link_field ? 0 : sizeof(E*)); + uint32_t* cur = (uint32_t*)seg; + const uint32_t* end = cur + zap_bytes / sizeof(uint32_t); + while (cur < end) { + *cur++ = 0xfadfaded; + } +} +#endif + +template +E* ResourceStack::alloc(size_t bytes) +{ + return (E*) resource_allocate_bytes(bytes); +} + +template +void ResourceStack::free(E* addr, size_t bytes) +{ + resource_free_bytes((char*) addr, bytes); +} + +template +void StackIterator::sync() +{ + _full_seg_size = _stack._full_seg_size; + _cur_seg_size = _stack._cur_seg_size; + _cur_seg = _stack._cur_seg; +} + +template +E* StackIterator::next_addr() +{ + assert(!is_empty(), "no items left"); + if (_cur_seg_size == 1) { + E* addr = _cur_seg; + _cur_seg = _stack.get_link(_cur_seg); + _cur_seg_size = _stack.segment_size(); + _full_seg_size -= _stack.segment_size(); + return addr; + } + return _cur_seg + --_cur_seg_size; +} diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -144,6 +144,7 @@ bool ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + assert(_n_threads > 0, "Initialization is incorrect"); assert(_offered_termination < _n_threads, "Invariant"); Atomic::inc(&_offered_termination); @@ -255,3 +256,9 @@ _index < objArrayOop(_obj)->length(); } #endif // ASSERT + +void ParallelTaskTerminator::reset_for_reuse(int n_threads) { + reset_for_reuse(); + _n_threads = n_threads; +} + diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -305,6 +305,12 @@ return false; } +// pop_local_slow() is done by the owning thread and is trying to +// get the last task in the queue. It will compete with pop_global() +// that will be used by other threads. The tag age is incremented +// whenever the queue goes empty which it will do here if this thread +// gets the last task or in pop_global() if the queue wraps (top == 0 +// and pop_global() succeeds, see pop_global()). template bool GenericTaskQueue::pop_local_slow(uint localBot, Age oldAge) { // This queue was observed to contain exactly one element; either this @@ -366,75 +372,47 @@ // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for // elements that do not fit in the TaskQueue. // -// Three methods from super classes are overridden: +// This class hides two methods from super classes: // -// initialize() - initialize the super classes and create the overflow stack // push() - push onto the task queue or, if that fails, onto the overflow stack // is_empty() - return true if both the TaskQueue and overflow stack are empty // -// Note that size() is not overridden--it returns the number of elements in the +// Note that size() is not hidden--it returns the number of elements in the // TaskQueue, and does not include the size of the overflow stack. This // simplifies replacement of GenericTaskQueues with OverflowTaskQueues. template class OverflowTaskQueue: public GenericTaskQueue { public: - typedef GrowableArray overflow_t; + typedef Stack overflow_t; typedef GenericTaskQueue taskqueue_t; TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) - OverflowTaskQueue(); - ~OverflowTaskQueue(); - void initialize(); - - inline overflow_t* overflow_stack() const { return _overflow_stack; } - // Push task t onto the queue or onto the overflow stack. Return true. inline bool push(E t); // Attempt to pop from the overflow stack; return true if anything was popped. inline bool pop_overflow(E& t); + inline overflow_t* overflow_stack() { return &_overflow_stack; } + inline bool taskqueue_empty() const { return taskqueue_t::is_empty(); } - inline bool overflow_empty() const { return overflow_stack()->is_empty(); } + inline bool overflow_empty() const { return _overflow_stack.is_empty(); } inline bool is_empty() const { return taskqueue_empty() && overflow_empty(); } private: - overflow_t* _overflow_stack; + overflow_t _overflow_stack; }; template -OverflowTaskQueue::OverflowTaskQueue() -{ - _overflow_stack = NULL; -} - -template -OverflowTaskQueue::~OverflowTaskQueue() -{ - if (_overflow_stack != NULL) { - delete _overflow_stack; - _overflow_stack = NULL; - } -} - -template -void OverflowTaskQueue::initialize() -{ - taskqueue_t::initialize(); - assert(_overflow_stack == NULL, "memory leak"); - _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(10, true); -} - -template bool OverflowTaskQueue::push(E t) { if (!taskqueue_t::push(t)) { overflow_stack()->push(t); - TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length())); + TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size())); } return true; } @@ -637,6 +615,9 @@ // in an MT-safe manner, once the previous round of use of // the terminator is finished. void reset_for_reuse(); + // Same as above but the number of parallel threads is set to the + // given number. + void reset_for_reuse(int n_threads); #ifdef TRACESPINNING static uint total_yields() { return _total_yields; } @@ -782,3 +763,4 @@ typedef OverflowTaskQueue RegionTaskQueue; typedef GenericTaskQueueSet RegionTaskQueueSet; + diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/workgroup.cpp --- a/src/share/vm/utilities/workgroup.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/workgroup.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,28 +53,52 @@ int workers, bool are_GC_task_threads, bool are_ConcurrentGC_threads) : - AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) -{ + AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) { // Save arguments. _total_workers = workers; +} + +GangWorker* WorkGang::allocate_worker(int which) { + GangWorker* new_worker = new GangWorker(this, which); + return new_worker; +} + +// The current implementation will exit if the allocation +// of any worker fails. Still, return a boolean so that +// a future implementation can possibly do a partial +// initialization of the workers and report such to the +// caller. +bool WorkGang::initialize_workers() { if (TraceWorkGang) { - tty->print_cr("Constructing work gang %s with %d threads", name, workers); + tty->print_cr("Constructing work gang %s with %d threads", + name(), + total_workers()); } - _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); + _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers()); if (gang_workers() == NULL) { vm_exit_out_of_memory(0, "Cannot create GangWorker array."); + return false; + } + os::ThreadType worker_type; + if (are_ConcurrentGC_threads()) { + worker_type = os::cgc_thread; + } else { + worker_type = os::pgc_thread; } for (int worker = 0; worker < total_workers(); worker += 1) { - GangWorker* new_worker = new GangWorker(this, worker); + GangWorker* new_worker = allocate_worker(worker); assert(new_worker != NULL, "Failed to allocate GangWorker"); _gang_workers[worker] = new_worker; - if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread)) + if (new_worker == NULL || !os::create_thread(new_worker, worker_type)) { vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources."); + return false; + } if (!DisableStartThread) { os::start_thread(new_worker); } } + return true; } AbstractWorkGang::~AbstractWorkGang() { @@ -383,7 +407,7 @@ return _tasks != NULL; } -void SubTasksDone::set_par_threads(int t) { +void SubTasksDone::set_n_threads(int t) { #ifdef ASSERT assert(_claimed == 0 || _threads_completed == _n_threads, "should not be called while tasks are being processed!"); diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/workgroup.hpp --- a/src/share/vm/utilities/workgroup.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/workgroup.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ class YieldingFlexibleGangWorker; class YieldingFlexibleGangTask; class WorkData; +class AbstractWorkGang; // An abstract task to be worked on by a gang. // You subclass this to supply your own work() method @@ -38,6 +39,13 @@ // The argument tells you which member of the gang you are. virtual void work(int i) = 0; + // This method configures the task for proper termination. + // Some tasks do not have any requirements on termination + // and may inherit this method that does nothing. Some + // tasks do some coordination on termination and override + // this method to implement that coordination. + virtual void set_for_termination(int active_workers) {}; + // Debugging accessor for the name. const char* name() const PRODUCT_RETURN_(return NULL;); int counter() { return _counter; } @@ -64,6 +72,18 @@ virtual ~AbstractGangTask() { } }; +class AbstractGangTaskWOopQueues : public AbstractGangTask { + OopTaskQueueSet* _queues; + ParallelTaskTerminator _terminator; + public: + AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) : + AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {} + ParallelTaskTerminator* terminator() { return &_terminator; } + virtual void set_for_termination(int active_workers) { + terminator()->reset_for_reuse(active_workers); + } + OopTaskQueueSet* queues() { return _queues; } +}; // Class AbstractWorkGang: // An abstract class representing a gang of workers. @@ -114,6 +134,9 @@ int total_workers() const { return _total_workers; } + virtual int active_workers() const { + return _total_workers; + } bool terminate() const { return _terminate; } @@ -199,6 +222,13 @@ bool are_GC_task_threads, bool are_ConcurrentGC_threads); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task); + void run_task(AbstractGangTask* task, uint no_of_parallel_workers); + // Allocate a worker and return a pointer to it. + virtual GangWorker* allocate_worker(int which); + // Initialize workers in the gang. Return true if initialization + // succeeded. The type of the worker can be overridden in a derived + // class with the appropriate implementation of allocate_worker(). + bool initialize_workers(); }; // Class GangWorker: @@ -226,6 +256,34 @@ AbstractWorkGang* gang() const { return _gang; } }; +class FlexibleWorkGang: public WorkGang { + protected: + int _active_workers; + public: + // Constructor and destructor. + FlexibleWorkGang(const char* name, int workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : + WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) { + _active_workers = ParallelGCThreads; + }; + // Accessors for fields + virtual int active_workers() const { return _active_workers; } + void set_active_workers(int v) { _active_workers = v; } +}; + +// Work gangs in garbage collectors: 2009-06-10 +// +// SharedHeap - work gang for stop-the-world parallel collection. +// Used by +// ParNewGeneration +// CMSParRemarkTask +// CMSRefProcTaskExecutor +// G1CollectedHeap +// G1ParFinalCountTask +// ConcurrentMark +// CMSCollector + // A class that acts as a synchronisation barrier. Workers enter // the barrier and must wait until all other workers have entered // before any of them may leave. @@ -271,7 +329,7 @@ int _n_threads; jint _threads_completed; #ifdef ASSERT - jint _claimed; + volatile jint _claimed; #endif // Set all tasks to unclaimed. @@ -286,9 +344,10 @@ // True iff the object is in a valid state. bool valid(); - // Set the number of parallel threads doing the tasks to "t". Can only + // Get/set the number of parallel threads doing the tasks to "t". Can only // be called before tasks start or after they are complete. - void set_par_threads(int t); + int n_threads() { return _n_threads; } + void set_n_threads(int t); // Returns "false" if the task "t" is unclaimed, and ensures that task is // claimed. The task "t" is required to be within the range of "this". @@ -315,13 +374,17 @@ protected: jint _n_tasks; // Total number of tasks available. jint _n_claimed; // Number of tasks claimed. + // _n_threads is used to determine when a sub task is done. + // See comments on SubTasksDone::_n_threads jint _n_threads; // Total number of parallel threads. jint _n_completed; // Number of completed threads. void clear(); public: - SequentialSubTasksDone() { clear(); } + SequentialSubTasksDone() { + clear(); + } ~SequentialSubTasksDone() {} // True iff the object is in a valid state. @@ -330,11 +393,12 @@ // number of tasks jint n_tasks() const { return _n_tasks; } - // Set the number of parallel threads doing the tasks to t. + // Get/set the number of parallel threads doing the tasks to t. // Should be called before the task starts but it is safe // to call this once a task is running provided that all // threads agree on the number of threads. - void set_par_threads(int t) { _n_threads = t; } + int n_threads() { return _n_threads; } + void set_n_threads(int t) { _n_threads = t; } // Set the number of tasks to be claimed to t. As above, // should be called before the tasks start but it is safe diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/yieldingWorkgroup.cpp --- a/src/share/vm/utilities/yieldingWorkgroup.cpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/yieldingWorkgroup.cpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,29 +32,13 @@ YieldingFlexibleWorkGang::YieldingFlexibleWorkGang( const char* name, int workers, bool are_GC_task_threads) : - AbstractWorkGang(name, are_GC_task_threads, false) { - // Save arguments. - _total_workers = workers; - assert(_total_workers > 0, "Must have more than 1 worker"); - - _yielded_workers = 0; + FlexibleWorkGang(name, workers, are_GC_task_threads, false), + _yielded_workers(0) {} - if (TraceWorkGang) { - tty->print_cr("Constructing work gang %s with %d threads", name, workers); - } - _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); - assert(gang_workers() != NULL, "Failed to allocate gang workers"); - for (int worker = 0; worker < total_workers(); worker += 1) { - YieldingFlexibleGangWorker* new_worker = - new YieldingFlexibleGangWorker(this, worker); - assert(new_worker != NULL, "Failed to allocate YieldingFlexibleGangWorker"); - _gang_workers[worker] = new_worker; - if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread)) - vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources."); - if (!DisableStartThread) { - os::start_thread(new_worker); - } - } +GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) { + YieldingFlexibleGangWorker* new_member = + new YieldingFlexibleGangWorker(this, which); + return (YieldingFlexibleGangWorker*) new_member; } // Run a task; returns when the task is done, or the workers yield, @@ -142,6 +126,7 @@ _active_workers = total_workers(); } new_task->set_actual_size(_active_workers); + new_task->set_for_termination(_active_workers); assert(_started_workers == 0, "Tabula rasa non"); assert(_finished_workers == 0, "Tabula rasa non"); @@ -161,22 +146,22 @@ for (Status status = yielding_task()->status(); status != COMPLETED && status != YIELDED && status != ABORTED; status = yielding_task()->status()) { - assert(started_workers() <= active_workers(), "invariant"); - assert(finished_workers() <= active_workers(), "invariant"); - assert(yielded_workers() <= active_workers(), "invariant"); + assert(started_workers() <= total_workers(), "invariant"); + assert(finished_workers() <= total_workers(), "invariant"); + assert(yielded_workers() <= total_workers(), "invariant"); monitor()->wait(Mutex::_no_safepoint_check_flag); } switch (yielding_task()->status()) { case COMPLETED: case ABORTED: { - assert(finished_workers() == active_workers(), "Inconsistent status"); + assert(finished_workers() == total_workers(), "Inconsistent status"); assert(yielded_workers() == 0, "Invariant"); reset(); // for next task; gang<->task binding released break; } case YIELDED: { assert(yielded_workers() > 0, "Invariant"); - assert(yielded_workers() + finished_workers() == active_workers(), + assert(yielded_workers() + finished_workers() == total_workers(), "Inconsistent counts"); break; } @@ -208,7 +193,6 @@ void YieldingFlexibleWorkGang::reset() { _started_workers = 0; _finished_workers = 0; - _active_workers = 0; yielding_task()->set_gang(NULL); _task = NULL; // unbind gang from task } @@ -216,7 +200,7 @@ void YieldingFlexibleWorkGang::yield() { assert(task() != NULL, "Inconsistency; should have task binding"); MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); - assert(yielded_workers() < active_workers(), "Consistency check"); + assert(yielded_workers() < total_workers(), "Consistency check"); if (yielding_task()->status() == ABORTING) { // Do not yield; we need to abort as soon as possible // XXX NOTE: This can cause a performance pathology in the @@ -227,7 +211,7 @@ // us to return at each potential yield point. return; } - if (++_yielded_workers + finished_workers() == active_workers()) { + if (++_yielded_workers + finished_workers() == total_workers()) { yielding_task()->set_status(YIELDED); monitor()->notify_all(); } else { diff -r 22e4420d19f7 -r 0715f0cf171d src/share/vm/utilities/yieldingWorkgroup.hpp --- a/src/share/vm/utilities/yieldingWorkgroup.hpp Wed Oct 06 14:18:32 2010 -0700 +++ b/src/share/vm/utilities/yieldingWorkgroup.hpp Fri Oct 08 09:29:09 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,6 +54,25 @@ virtual void loop(); }; +class FlexibleGangTask: public AbstractGangTask { + int _actual_size; // size of gang obtained +protected: + int _requested_size; // size of gang requested +public: + FlexibleGangTask(const char* name): AbstractGangTask(name), + _requested_size(0) {} + + // The abstract work method. + // The argument tells you which member of the gang you are. + virtual void work(int i) = 0; + + int requested_size() const { return _requested_size; } + int actual_size() const { return _actual_size; } + + void set_requested_size(int sz) { _requested_size = sz; } + void set_actual_size(int sz) { _actual_size = sz; } +}; + // An abstract task to be worked on by a flexible work gang, // and where the workers will periodically yield, usually // in response to some condition that is signalled by means @@ -70,19 +89,15 @@ // maximum) in response to task requests at certain points. // The last part (the flexible part) has not yet been fully // fleshed out and is a work in progress. -class YieldingFlexibleGangTask: public AbstractGangTask { +class YieldingFlexibleGangTask: public FlexibleGangTask { Status _status; YieldingFlexibleWorkGang* _gang; - int _actual_size; // size of gang obtained protected: - int _requested_size; // size of gang requested - // Constructor and desctructor: only construct subclasses. - YieldingFlexibleGangTask(const char* name): AbstractGangTask(name), + YieldingFlexibleGangTask(const char* name): FlexibleGangTask(name), _status(INACTIVE), - _gang(NULL), - _requested_size(0) { } + _gang(NULL) { } virtual ~YieldingFlexibleGangTask() { } @@ -122,24 +137,18 @@ virtual void abort(); Status status() const { return _status; } + bool yielding() const { return _status == YIELDING; } bool yielded() const { return _status == YIELDED; } bool completed() const { return _status == COMPLETED; } bool aborted() const { return _status == ABORTED; } bool active() const { return _status == ACTIVE; } - - int requested_size() const { return _requested_size; } - int actual_size() const { return _actual_size; } - - void set_requested_size(int sz) { _requested_size = sz; } - void set_actual_size(int sz) { _actual_size = sz; } }; - // Class YieldingWorkGang: A subclass of WorkGang. // In particular, a YieldingWorkGang is made up of // YieldingGangWorkers, and provides infrastructure // supporting yielding to the "GangOverseer", // being the thread that orchestrates the WorkGang via run_task(). -class YieldingFlexibleWorkGang: public AbstractWorkGang { +class YieldingFlexibleWorkGang: public FlexibleWorkGang { // Here's the public interface to this class. public: // Constructor and destructor. @@ -151,6 +160,9 @@ "Incorrect cast"); return (YieldingFlexibleGangTask*)task(); } + // Allocate a worker and return a pointer to it. + GangWorker* allocate_worker(int which); + // Run a task; returns when the task is done, or the workers yield, // or the task is aborted, or the work gang is terminated via stop(). // A task that has been yielded can be continued via this same interface @@ -180,10 +192,6 @@ void abort(); private: - // The currently active workers in this gang. - // This is a number that is dynamically adjusted by - // the run_task() method at each subsequent invocation, - // using data in the YieldingFlexibleGangTask. int _active_workers; int _yielded_workers; void wait_for_gang(); @@ -194,6 +202,7 @@ return _active_workers; } + // Accessors for fields int yielded_workers() const { return _yielded_workers; }