# HG changeset patch # User pliden # Date 1400067164 -7200 # Node ID 487f09bf44e0b39e4c987415ee7721853297d0c3 # Parent e4d318eea75a38bc9c3acd79df1f1419abdcabc3 8040803: G1: Concurrent mark hangs when mark stack overflows Reviewed-by: brutisso, ehelin diff -r e4d318eea75a -r 487f09bf44e0 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Apr 17 18:47:15 2014 +0200 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed May 14 13:32:44 2014 +0200 @@ -978,7 +978,9 @@ if (concurrent()) { ConcurrentGCThread::stsLeave(); } - _first_overflow_barrier_sync.enter(); + + bool barrier_aborted = !_first_overflow_barrier_sync.enter(); + if (concurrent()) { ConcurrentGCThread::stsJoin(); } @@ -986,7 +988,17 @@ // more work if (verbose_low()) { - gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); + if (barrier_aborted) { + gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); + } else { + gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); + } + } + + if (barrier_aborted) { + // If the barrier aborted we ignore the overflow condition and + // just abort the whole marking phase as quickly as possible. + return; } // If we're executing the concurrent phase of marking, reset the marking @@ -1026,14 +1038,20 @@ if (concurrent()) { ConcurrentGCThread::stsLeave(); } - _second_overflow_barrier_sync.enter(); + + bool barrier_aborted = !_second_overflow_barrier_sync.enter(); + if (concurrent()) { ConcurrentGCThread::stsJoin(); } // at this point everything should be re-initialized and ready to go if (verbose_low()) { - gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); + if (barrier_aborted) { + gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); + } else { + gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); + } } } @@ -3232,6 +3250,8 @@ for (uint i = 0; i < _max_worker_id; ++i) { _tasks[i]->clear_region_fields(); } + _first_overflow_barrier_sync.abort(); + _second_overflow_barrier_sync.abort(); _has_aborted = true; SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); diff -r e4d318eea75a -r 487f09bf44e0 src/share/vm/utilities/workgroup.cpp --- a/src/share/vm/utilities/workgroup.cpp Thu Apr 17 18:47:15 2014 +0200 +++ b/src/share/vm/utilities/workgroup.cpp Wed May 14 13:32:44 2014 +0200 @@ -378,21 +378,22 @@ WorkGangBarrierSync::WorkGangBarrierSync() : _monitor(Mutex::safepoint, "work gang barrier sync", true), - _n_workers(0), _n_completed(0), _should_reset(false) { + _n_workers(0), _n_completed(0), _should_reset(false), _aborted(false) { } WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name) : _monitor(Mutex::safepoint, name, true), - _n_workers(n_workers), _n_completed(0), _should_reset(false) { + _n_workers(n_workers), _n_completed(0), _should_reset(false), _aborted(false) { } void WorkGangBarrierSync::set_n_workers(uint n_workers) { - _n_workers = n_workers; - _n_completed = 0; + _n_workers = n_workers; + _n_completed = 0; _should_reset = false; + _aborted = false; } -void WorkGangBarrierSync::enter() { +bool WorkGangBarrierSync::enter() { MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); if (should_reset()) { // The should_reset() was set and we are the first worker to enter @@ -415,10 +416,17 @@ set_should_reset(true); monitor()->notify_all(); } else { - while (n_completed() != n_workers()) { + while (n_completed() != n_workers() && !aborted()) { monitor()->wait(/* no_safepoint_check */ true); } } + return !aborted(); +} + +void WorkGangBarrierSync::abort() { + MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); + set_aborted(); + monitor()->notify_all(); } // SubTasksDone functions. diff -r e4d318eea75a -r 487f09bf44e0 src/share/vm/utilities/workgroup.hpp --- a/src/share/vm/utilities/workgroup.hpp Thu Apr 17 18:47:15 2014 +0200 +++ b/src/share/vm/utilities/workgroup.hpp Wed May 14 13:32:44 2014 +0200 @@ -359,18 +359,20 @@ class WorkGangBarrierSync : public StackObj { protected: Monitor _monitor; - uint _n_workers; - uint _n_completed; + uint _n_workers; + uint _n_completed; bool _should_reset; + bool _aborted; Monitor* monitor() { return &_monitor; } uint n_workers() { return _n_workers; } uint n_completed() { return _n_completed; } bool should_reset() { return _should_reset; } + bool aborted() { return _aborted; } void zero_completed() { _n_completed = 0; } void inc_completed() { _n_completed++; } - + void set_aborted() { _aborted = true; } void set_should_reset(bool v) { _should_reset = v; } public: @@ -383,8 +385,14 @@ // Enter the barrier. A worker that enters the barrier will // not be allowed to leave until all other threads have - // also entered the barrier. - void enter(); + // also entered the barrier or the barrier is aborted. + // Returns false if the barrier was aborted. + bool enter(); + + // Aborts the barrier and wakes up any threads waiting for + // the barrier to complete. The barrier will remain in the + // aborted state until the next call to set_n_workers(). + void abort(); }; // A class to manage claiming of subtasks within a group of tasks. The