# HG changeset patch # User ysr # Date 1227212861 28800 # Node ID 00b023ae2d78987955a9647b6d6d457bd39dec7e # Parent b5e603f2e02480180ace404141897cf8f6266e93 6722113: CMS: Incorrect overflow handling during precleaning of Reference lists Summary: When we encounter marking stack overflow during precleaning of Reference lists, we were using the overflow list mechanism, which can cause problems on account of mutating the mark word of the header because of conflicts with mutator accesses and updates of that field. Instead we should use the usual mechanism for overflow handling in concurrent phases, namely dirtying of the card on which the overflowed object lies. Since precleaning effectively does a form of discovered list processing, albeit with discovery enabled, we needed to adjust some code to be correct in the face of interleaved processing and discovery. Reviewed-by: apetrusenko, jcoomes diff -r b5e603f2e024 -r 00b023ae2d78 src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp Wed Nov 19 14:20:51 2008 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp Thu Nov 20 12:27:41 2008 -0800 @@ -325,24 +325,30 @@ // For objects in CMS generation, this closure marks // given objects (transitively) as being reachable/live. // This is currently used during the (weak) reference object -// processing phase of the CMS final checkpoint step. +// processing phase of the CMS final checkpoint step, as +// well as during the concurrent precleaning of the discovered +// reference lists. class CMSKeepAliveClosure: public OopClosure { private: CMSCollector* _collector; const MemRegion _span; CMSMarkStack* _mark_stack; CMSBitMap* _bit_map; + bool _concurrent_precleaning; protected: DO_OOP_WORK_DEFN public: CMSKeepAliveClosure(CMSCollector* collector, MemRegion span, - CMSBitMap* bit_map, CMSMarkStack* mark_stack): + CMSBitMap* bit_map, CMSMarkStack* mark_stack, + bool cpc): _collector(collector), _span(span), _bit_map(bit_map), - _mark_stack(mark_stack) { + _mark_stack(mark_stack), + _concurrent_precleaning(cpc) { assert(!_span.is_empty(), "Empty span could spell trouble"); } + bool concurrent_precleaning() const { return _concurrent_precleaning; } virtual void do_oop(oop* p); virtual void do_oop(narrowOop* p); inline void do_oop_nv(oop* p) { CMSKeepAliveClosure::do_oop_work(p); } diff -r b5e603f2e024 -r 00b023ae2d78 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Nov 19 14:20:51 2008 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Nov 20 12:27:41 2008 -0800 @@ -538,6 +538,7 @@ _survivor_chunk_capacity(0), // -- ditto -- _survivor_chunk_index(0), // -- ditto -- _ser_pmc_preclean_ovflw(0), + _ser_kac_preclean_ovflw(0), _ser_pmc_remark_ovflw(0), _par_pmc_remark_ovflw(0), _ser_kac_ovflw(0), @@ -4388,10 +4389,10 @@ CMSPrecleanRefsYieldClosure yield_cl(this); assert(rp->span().equals(_span), "Spans should be equal"); CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap, - &_markStack); + &_markStack, true /* preclean */); CMSDrainMarkingStackClosure complete_trace(this, - _span, &_markBitMap, &_markStack, - &keep_alive); + _span, &_markBitMap, &_markStack, + &keep_alive, true /* preclean */); // We don't want this step to interfere with a young // collection because we don't want to take CPU @@ -4852,17 +4853,19 @@ // recurrence of that condition. assert(_markStack.isEmpty(), "No grey objects"); size_t ser_ovflw = _ser_pmc_remark_ovflw + _ser_pmc_preclean_ovflw + - _ser_kac_ovflw; + _ser_kac_ovflw + _ser_kac_preclean_ovflw; if (ser_ovflw > 0) { if (PrintCMSStatistics != 0) { gclog_or_tty->print_cr("Marking stack overflow (benign) " - "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")", + "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT + ", kac_preclean="SIZE_FORMAT")", _ser_pmc_preclean_ovflw, _ser_pmc_remark_ovflw, - _ser_kac_ovflw); + _ser_kac_ovflw, _ser_kac_preclean_ovflw); } _markStack.expand(); _ser_pmc_remark_ovflw = 0; _ser_pmc_preclean_ovflw = 0; + _ser_kac_preclean_ovflw = 0; _ser_kac_ovflw = 0; } if (_par_pmc_remark_ovflw > 0 || _par_kac_ovflw > 0) { @@ -5693,10 +5696,10 @@ ReferenceProcessor* rp = ref_processor(); assert(rp->span().equals(_span), "Spans should be equal"); CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap, - &_markStack); + &_markStack, false /* !preclean */); CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this, _span, &_markBitMap, &_markStack, - &cmsKeepAliveClosure); + &cmsKeepAliveClosure, false /* !preclean */); { TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); if (rp->processing_is_mt()) { @@ -8302,8 +8305,29 @@ } ) if (simulate_overflow || !_mark_stack->push(obj)) { - _collector->push_on_overflow_list(obj); - _collector->_ser_kac_ovflw++; + if (_concurrent_precleaning) { + // We dirty the overflown object and let the remark + // phase deal with it. + assert(_collector->overflow_list_is_empty(), "Error"); + // In the case of object arrays, we need to dirty all of + // the cards that the object spans. No locking or atomics + // are needed since no one else can be mutating the mod union + // table. + if (obj->is_objArray()) { + size_t sz = obj->size(); + HeapWord* end_card_addr = + (HeapWord*)round_to((intptr_t)(addr+sz), CardTableModRefBS::card_size); + MemRegion redirty_range = MemRegion(addr, end_card_addr); + assert(!redirty_range.is_empty(), "Arithmetical tautology"); + _collector->_modUnionTable.mark_range(redirty_range); + } else { + _collector->_modUnionTable.mark(addr); + } + _collector->_ser_kac_preclean_ovflw++; + } else { + _collector->push_on_overflow_list(obj); + _collector->_ser_kac_ovflw++; + } } } } @@ -8400,6 +8424,8 @@ void CMSDrainMarkingStackClosure::do_void() { // the max number to take from overflow list at a time const size_t num = _mark_stack->capacity()/4; + assert(!_concurrent_precleaning || _collector->overflow_list_is_empty(), + "Overflow list should be NULL during concurrent phases"); while (!_mark_stack->isEmpty() || // if stack is empty, check the overflow list _collector->take_from_overflow_list(num, _mark_stack)) { diff -r b5e603f2e024 -r 00b023ae2d78 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Nov 19 14:20:51 2008 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Thu Nov 20 12:27:41 2008 -0800 @@ -592,6 +592,7 @@ size_t _ser_pmc_preclean_ovflw; size_t _ser_pmc_remark_ovflw; size_t _par_pmc_remark_ovflw; + size_t _ser_kac_preclean_ovflw; size_t _ser_kac_ovflw; size_t _par_kac_ovflw; NOT_PRODUCT(size_t _num_par_pushes;) @@ -1749,21 +1750,30 @@ // work-routine/closure used to complete transitive // marking of objects as live after a certain point // in which an initial set has been completely accumulated. +// This closure is currently used both during the final +// remark stop-world phase, as well as during the concurrent +// precleaning of the discovered reference lists. class CMSDrainMarkingStackClosure: public VoidClosure { CMSCollector* _collector; MemRegion _span; CMSMarkStack* _mark_stack; CMSBitMap* _bit_map; CMSKeepAliveClosure* _keep_alive; + bool _concurrent_precleaning; public: CMSDrainMarkingStackClosure(CMSCollector* collector, MemRegion span, CMSBitMap* bit_map, CMSMarkStack* mark_stack, - CMSKeepAliveClosure* keep_alive): + CMSKeepAliveClosure* keep_alive, + bool cpc): _collector(collector), _span(span), _bit_map(bit_map), _mark_stack(mark_stack), - _keep_alive(keep_alive) { } + _keep_alive(keep_alive), + _concurrent_precleaning(cpc) { + assert(_concurrent_precleaning == _keep_alive->concurrent_precleaning(), + "Mismatch"); + } void do_void(); }; diff -r b5e603f2e024 -r 00b023ae2d78 src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Wed Nov 19 14:20:51 2008 -0800 +++ b/src/share/vm/memory/referenceProcessor.cpp Thu Nov 20 12:27:41 2008 -0800 @@ -47,7 +47,9 @@ } bool empty() const { return head() == ReferenceProcessor::sentinel_ref(); } size_t length() { return _len; } - void set_length(size_t len) { _len = len; } + void set_length(size_t len) { _len = len; } + void inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); } + void dec_length(size_t dec) { _len -= dec; } private: // Set value depending on UseCompressedOops. This could be a template class // but then we have to fix all the instantiations and declarations that use this class. @@ -436,13 +438,13 @@ // The "allow_null_referent" argument tells us to allow for the possibility // of a NULL referent in the discovered Reference object. This typically // happens in the case of concurrent collectors that may have done the - // discovery concurrently or interleaved with mutator execution. + // discovery concurrently, or interleaved, with mutator execution. inline void load_ptrs(DEBUG_ONLY(bool allow_null_referent)); // Move to the next discovered reference. inline void next(); - // Remove the current reference from the list and move to the next. + // Remove the current reference from the list inline void remove(); // Make the Reference object active again. @@ -476,7 +478,6 @@ inline size_t removed() const { return _removed; } ) -private: inline void move_to_next(); private: @@ -553,7 +554,7 @@ oopDesc::store_heap_oop((oop*)_prev_next, _next); } NOT_PRODUCT(_removed++); - move_to_next(); + _refs_list.dec_length(1); } inline void DiscoveredListIterator::move_to_next() { @@ -591,12 +592,13 @@ gclog_or_tty->print_cr("Dropping reference (" INTPTR_FORMAT ": %s" ") by policy", iter.obj(), iter.obj()->blueprint()->internal_name()); } + // Remove Reference object from list + iter.remove(); // Make the Reference object active again iter.make_active(); // keep the referent around iter.make_referent_alive(); - // Remove Reference object from list - iter.remove(); + iter.move_to_next(); } else { iter.next(); } @@ -629,12 +631,13 @@ iter.obj(), iter.obj()->blueprint()->internal_name()); } // The referent is reachable after all. + // Remove Reference object from list. + iter.remove(); // Update the referent pointer as necessary: Note that this // should not entail any recursive marking because the // referent must already have been traversed. iter.make_referent_alive(); - // Remove Reference object from list - iter.remove(); + iter.move_to_next(); } else { iter.next(); } @@ -670,6 +673,7 @@ } else { keep_alive->do_oop((oop*)next_addr); } + iter.move_to_next(); } else { iter.next(); } @@ -832,9 +836,9 @@ } java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head()); ref_lists[to_idx].set_head(move_head); - ref_lists[to_idx].set_length(ref_lists[to_idx].length() + refs_to_move); + ref_lists[to_idx].inc_length(refs_to_move); ref_lists[from_idx].set_head(new_head); - ref_lists[from_idx].set_length(ref_lists[from_idx].length() - refs_to_move); + ref_lists[from_idx].dec_length(refs_to_move); } else { ++to_idx; } @@ -923,7 +927,6 @@ void ReferenceProcessor::clean_up_discovered_reflist(DiscoveredList& refs_list) { assert(!discovery_is_atomic(), "Else why call this method?"); DiscoveredListIterator iter(refs_list, NULL, NULL); - size_t length = refs_list.length(); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop next = java_lang_ref_Reference::next(iter.obj()); @@ -941,12 +944,11 @@ ) // Remove Reference object from list iter.remove(); - --length; + iter.move_to_next(); } else { iter.next(); } } - refs_list.set_length(length); NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { gclog_or_tty->print( @@ -1024,7 +1026,7 @@ // We have separate lists for enqueueing so no synchronization // is necessary. refs_list.set_head(obj); - refs_list.set_length(refs_list.length() + 1); + refs_list.inc_length(1); if (_discovered_list_needs_barrier) { _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR"); } @@ -1168,7 +1170,7 @@ _bs->write_ref_field((oop*)discovered_addr, current_head); } list->set_head(obj); - list->set_length(list->length() + 1); + list->inc_length(1); } // In the MT discovery case, it is currently possible to see @@ -1209,45 +1211,48 @@ TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); for (int i = 0; i < _num_q; i++) { + if (yield->should_return()) { + return; + } preclean_discovered_reflist(_discoveredSoftRefs[i], is_alive, keep_alive, complete_gc, yield); } } - if (yield->should_return()) { - return; - } // Weak references { TraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); for (int i = 0; i < _num_q; i++) { + if (yield->should_return()) { + return; + } preclean_discovered_reflist(_discoveredWeakRefs[i], is_alive, keep_alive, complete_gc, yield); } } - if (yield->should_return()) { - return; - } // Final references { TraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); for (int i = 0; i < _num_q; i++) { + if (yield->should_return()) { + return; + } preclean_discovered_reflist(_discoveredFinalRefs[i], is_alive, keep_alive, complete_gc, yield); } } - if (yield->should_return()) { - return; - } // Phantom references { TraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); for (int i = 0; i < _num_q; i++) { + if (yield->should_return()) { + return; + } preclean_discovered_reflist(_discoveredPhantomRefs[i], is_alive, keep_alive, complete_gc, yield); } @@ -1256,9 +1261,12 @@ // Walk the given discovered ref list, and remove all reference objects // whose referents are still alive, whose referents are NULL or which -// are not active (have a non-NULL next field). NOTE: For this to work -// correctly, refs discovery can not be happening concurrently with this -// step. +// are not active (have a non-NULL next field). NOTE: When we are +// thus precleaning the ref lists (which happens single-threaded today), +// we do not disable refs discovery to honour the correct semantics of +// java.lang.Reference. As a result, we need to be careful below +// that ref removal steps interleave safely with ref discovery steps +// (in this thread). void ReferenceProcessor::preclean_discovered_reflist(DiscoveredList& refs_list, BoolObjectClosure* is_alive, @@ -1266,7 +1274,6 @@ VoidClosure* complete_gc, YieldClosure* yield) { DiscoveredListIterator iter(refs_list, keep_alive, is_alive); - size_t length = refs_list.length(); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop obj = iter.obj(); @@ -1281,7 +1288,6 @@ } // Remove Reference object from list iter.remove(); - --length; // Keep alive its cohort. iter.make_referent_alive(); if (UseCompressedOops) { @@ -1291,12 +1297,11 @@ oop* next_addr = (oop*)java_lang_ref_Reference::next_addr(obj); keep_alive->do_oop(next_addr); } + iter.move_to_next(); } else { iter.next(); } } - refs_list.set_length(length); - // Close the reachable set complete_gc->do_void(); diff -r b5e603f2e024 -r 00b023ae2d78 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Nov 19 14:20:51 2008 -0800 +++ b/src/share/vm/runtime/globals.hpp Thu Nov 20 12:27:41 2008 -0800 @@ -1474,7 +1474,7 @@ "CMSPrecleanNumerator:CMSPrecleanDenominator yields convergence" \ " ratio") \ \ - product(bool, CMSPrecleanRefLists1, false, \ + product(bool, CMSPrecleanRefLists1, true, \ "Preclean ref lists during (initial) preclean phase") \ \ product(bool, CMSPrecleanRefLists2, false, \