# HG changeset patch # User tonyp # Date 1304095249 14400 # Node ID cd8e33b2a8adb8860d425e1db417199852b47e55 # Parent da0fffdcc453008ea932a5b59e78f5ab6f668cec 7034139: G1: assert(Thread::current()->is_ConcurrentGC_thread()) failed: only a conc GC thread can call this. Summary: We were calling STS join and leave during a STW pause and we are not suppoesed to. I now only call those during concurrent phase. I also added stress code in the non-product builds to force an overflows (the condition that ws uncovering the bug) to make sure it does not happen again. Reviewed-by: johnc, brutisso diff -r da0fffdcc453 -r cd8e33b2a8ad src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Apr 28 15:29:18 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Apr 29 12:40:49 2011 -0400 @@ -826,6 +826,14 @@ void ConcurrentMark::checkpointRootsInitialPost() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // If we force an overflow during remark, the remark operation will + // actually abort and we'll restart concurrent marking. If we always + // force an oveflow during remark we'll never actually complete the + // marking phase. So, we initilize this here, at the start of the + // cycle, so that at the remaining overflow number will decrease at + // every remark and we'll eventually not need to cause one. + force_overflow_stw()->init(); + // For each region note start of marking. NoteStartOfMarkHRClosure startcl; g1h->heap_region_iterate(&startcl); @@ -893,27 +901,37 @@ } /* - Notice that in the next two methods, we actually leave the STS - during the barrier sync and join it immediately afterwards. If we - do not do this, this then the following deadlock can occur: one - thread could be in the barrier sync code, waiting for the other - thread to also sync up, whereas another one could be trying to - yield, while also waiting for the other threads to sync up too. - - Because the thread that does the sync barrier has left the STS, it - is possible to be suspended for a Full GC or an evacuation pause - could occur. This is actually safe, since the entering the sync - barrier is one of the last things do_marking_step() does, and it - doesn't manipulate any data structures afterwards. -*/ + * Notice that in the next two methods, we actually leave the STS + * during the barrier sync and join it immediately afterwards. If we + * do not do this, the following deadlock can occur: one thread could + * be in the barrier sync code, waiting for the other thread to also + * sync up, whereas another one could be trying to yield, while also + * waiting for the other threads to sync up too. + * + * Note, however, that this code is also used during remark and in + * this case we should not attempt to leave / enter the STS, otherwise + * we'll either hit an asseert (debug / fastdebug) or deadlock + * (product). So we should only leave / enter the STS if we are + * operating concurrently. + * + * Because the thread that does the sync barrier has left the STS, it + * is possible to be suspended for a Full GC or an evacuation pause + * could occur. This is actually safe, since the entering the sync + * barrier is one of the last things do_marking_step() does, and it + * doesn't manipulate any data structures afterwards. + */ void ConcurrentMark::enter_first_sync_barrier(int task_num) { if (verbose_low()) gclog_or_tty->print_cr("[%d] entering first barrier", task_num); - ConcurrentGCThread::stsLeave(); + if (concurrent()) { + ConcurrentGCThread::stsLeave(); + } _first_overflow_barrier_sync.enter(); - ConcurrentGCThread::stsJoin(); + if (concurrent()) { + ConcurrentGCThread::stsJoin(); + } // at this point everyone should have synced up and not be doing any // more work @@ -923,7 +941,12 @@ // let task 0 do this if (task_num == 0) { // task 0 is responsible for clearing the global data structures - clear_marking_state(); + // We should be here because of an overflow. During STW we should + // not clear the overflow flag since we rely on it being true when + // we exit this method to abort the pause and restart concurent + // marking. + clear_marking_state(concurrent() /* clear_overflow */); + force_overflow()->update(); if (PrintGC) { gclog_or_tty->date_stamp(PrintGCDateStamps); @@ -940,15 +963,45 @@ if (verbose_low()) gclog_or_tty->print_cr("[%d] entering second barrier", task_num); - ConcurrentGCThread::stsLeave(); + if (concurrent()) { + ConcurrentGCThread::stsLeave(); + } _second_overflow_barrier_sync.enter(); - ConcurrentGCThread::stsJoin(); + if (concurrent()) { + ConcurrentGCThread::stsJoin(); + } // at this point everything should be re-initialised and ready to go if (verbose_low()) gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); } +#ifndef PRODUCT +void ForceOverflowSettings::init() { + _num_remaining = G1ConcMarkForceOverflow; + _force = false; + update(); +} + +void ForceOverflowSettings::update() { + if (_num_remaining > 0) { + _num_remaining -= 1; + _force = true; + } else { + _force = false; + } +} + +bool ForceOverflowSettings::should_force() { + if (_force) { + _force = false; + return true; + } else { + return false; + } +} +#endif // !PRODUCT + void ConcurrentMark::grayRoot(oop p) { HeapWord* addr = (HeapWord*) p; // We can't really check against _heap_start and _heap_end, since it @@ -1117,6 +1170,7 @@ _restart_for_overflow = false; size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); + force_overflow_conc()->init(); set_phase(active_workers, true /* concurrent */); CMConcurrentMarkingTask markingTask(this, cmThread()); @@ -2703,12 +2757,16 @@ } -void ConcurrentMark::clear_marking_state() { +void ConcurrentMark::clear_marking_state(bool clear_overflow) { _markStack.setEmpty(); _markStack.clear_overflow(); _regionStack.setEmpty(); _regionStack.clear_overflow(); - clear_has_overflown(); + if (clear_overflow) { + clear_has_overflown(); + } else { + assert(has_overflown(), "pre-condition"); + } _finger = _heap_start; for (int i = 0; i < (int)_max_task_num; ++i) { @@ -4279,6 +4337,15 @@ } } + // If we are about to wrap up and go into termination, check if we + // should raise the overflow flag. + if (do_termination && !has_aborted()) { + if (_cm->force_overflow()->should_force()) { + _cm->set_has_overflown(); + regular_clock_call(); + } + } + // We still haven't aborted. Now, let's try to get into the // termination protocol. if (do_termination && !has_aborted()) { diff -r da0fffdcc453 -r cd8e33b2a8ad src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Apr 28 15:29:18 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Apr 29 12:40:49 2011 -0400 @@ -316,6 +316,19 @@ void setEmpty() { _index = 0; clear_overflow(); } }; +class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC { +private: +#ifndef PRODUCT + uintx _num_remaining; + bool _force; +#endif // !defined(PRODUCT) + +public: + void init() PRODUCT_RETURN; + void update() PRODUCT_RETURN; + bool should_force() PRODUCT_RETURN_( return false; ); +}; + // this will enable a variety of different statistics per GC task #define _MARKING_STATS_ 0 // this will enable the higher verbose levels @@ -462,6 +475,9 @@ WorkGang* _parallel_workers; + ForceOverflowSettings _force_overflow_conc; + ForceOverflowSettings _force_overflow_stw; + void weakRefsWork(bool clear_all_soft_refs); void swapMarkBitMaps(); @@ -470,7 +486,7 @@ // task local ones; should be called during initial mark. void reset(); // It resets all the marking data structures. - void clear_marking_state(); + void clear_marking_state(bool clear_overflow = true); // It should be called to indicate which phase we're in (concurrent // mark or remark) and how many threads are currently active. @@ -547,6 +563,22 @@ void enter_first_sync_barrier(int task_num); void enter_second_sync_barrier(int task_num); + ForceOverflowSettings* force_overflow_conc() { + return &_force_overflow_conc; + } + + ForceOverflowSettings* force_overflow_stw() { + return &_force_overflow_stw; + } + + ForceOverflowSettings* force_overflow() { + if (concurrent()) { + return force_overflow_conc(); + } else { + return force_overflow_stw(); + } + } + public: // Manipulation of the global mark stack. // Notice that the first mark_stack_push is CAS-based, whereas the diff -r da0fffdcc453 -r cd8e33b2a8ad src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Apr 28 15:29:18 2011 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Apr 29 12:40:49 2011 -0400 @@ -311,7 +311,11 @@ \ develop(bool, G1ExitOnExpansionFailure, false, \ "Raise a fatal VM exit out of memory failure in the event " \ - " that heap expansion fails due to running out of swap.") + " that heap expansion fails due to running out of swap.") \ + \ + develop(uintx, G1ConcMarkForceOverflow, 0, \ + "The number of times we'll force an overflow during " \ + "concurrent marking") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)