# HG changeset patch # User jcoomes # Date 1281357665 25200 # Node ID 5f429ee79634c326a1b8acb3d4d72be9f90a9395 # Parent a03ae377b2e8a3938584f76cc854c3e81a941531 6966222: G1: simplify TaskQueue overflow handling Reviewed-by: tonyp, ysr diff -r a03ae377b2e8 -r 5f429ee79634 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Aug 06 10:17:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Aug 09 05:41:05 2010 -0700 @@ -2710,6 +2710,35 @@ } }; +#if TASKQUEUE_STATS +void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) { + st->print_raw_cr("GC Task Stats"); + st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); + st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); +} + +void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const { + print_taskqueue_stats_hdr(st); + + TaskQueueStats totals; + const int n = MAX2(workers()->total_workers(), 1); + for (int i = 0; i < n; ++i) { + st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr(); + totals += task_queue(i)->stats; + } + st->print_raw("tot "); totals.print(st); st->cr(); + + DEBUG_ONLY(totals.verify()); +} + +void G1CollectedHeap::reset_taskqueue_stats() { + const int n = MAX2(workers()->total_workers(), 1); + for (int i = 0; i < n; ++i) { + task_queue(i)->stats.reset(); + } +} +#endif // TASKQUEUE_STATS + void G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { if (GC_locker::check_active_before_gc()) { @@ -2970,6 +2999,9 @@ } } + TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats()); + TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); + if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } @@ -3715,10 +3747,6 @@ _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)), _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)), _age_table(false), -#if G1_DETAILED_STATS - _pushes(0), _pops(0), _steals(0), - _steal_attempts(0), _overflow_pushes(0), -#endif _strong_roots_time(0), _term_time(0), _alloc_buffer_waste(0), _undo_waste(0) { @@ -3738,14 +3766,41 @@ _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM; memset(_surviving_young_words, 0, real_length * sizeof(size_t)); - _overflowed_refs = new OverflowQueue(10); - _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer; _alloc_buffers[GCAllocForTenured] = &_tenured_alloc_buffer; _start = os::elapsedTime(); } +void +G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st) +{ + st->print_raw_cr("GC Termination Stats"); + st->print_raw_cr(" elapsed --strong roots-- -------termination-------" + " ------waste (KiB)------"); + st->print_raw_cr("thr ms ms % ms % attempts" + " total alloc undo"); + st->print_raw_cr("--- --------- --------- ------ --------- ------ --------" + " ------- ------- -------"); +} + +void +G1ParScanThreadState::print_termination_stats(int i, + outputStream* const st) const +{ + const double elapsed_ms = elapsed_time() * 1000.0; + const double s_roots_ms = strong_roots_time() * 1000.0; + const double term_ms = term_time() * 1000.0; + st->print_cr("%3d %9.2f %9.2f %6.2f " + "%9.2f %6.2f " SIZE_FORMAT_W(8) " " + SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7), + i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, + term_ms, term_ms * 100 / elapsed_ms, term_attempts(), + (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K, + alloc_buffer_waste() * HeapWordSize / K, + undo_waste() * HeapWordSize / K); +} + G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), _par_scan_state(par_scan_state) { } @@ -3952,12 +4007,9 @@ G1ParScanThreadState* pss = par_scan_state(); while (true) { pss->trim_queue(); - IF_G1_DETAILED_STATS(pss->note_steal_attempt()); StarTask stolen_task; if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) { - IF_G1_DETAILED_STATS(pss->note_steal()); - // slightly paranoid tests; I'm trying to catch potential // problems before we go into push_on_queue to know where the // problem is coming from @@ -4076,35 +4128,9 @@ // Clean up any par-expanded rem sets. HeapRegionRemSet::par_cleanup(); - MutexLocker x(stats_lock()); if (ParallelGCVerbose) { - gclog_or_tty->print("Thread %d complete:\n", i); -#if G1_DETAILED_STATS - gclog_or_tty->print(" Pushes: %7d Pops: %7d Overflows: %7d Steals %7d (in %d attempts)\n", - pss.pushes(), - pss.pops(), - pss.overflow_pushes(), - pss.steals(), - pss.steal_attempts()); -#endif - double elapsed = pss.elapsed(); - double strong_roots = pss.strong_roots_time(); - double term = pss.term_time(); - gclog_or_tty->print(" Elapsed: %7.2f ms.\n" - " Strong roots: %7.2f ms (%6.2f%%)\n" - " Termination: %7.2f ms (%6.2f%%) " - "(in "SIZE_FORMAT" entries)\n", - elapsed * 1000.0, - strong_roots * 1000.0, (strong_roots*100.0/elapsed), - term * 1000.0, (term*100.0/elapsed), - pss.term_attempts()); - size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste(); - gclog_or_tty->print(" Waste: %8dK\n" - " Alloc Buffer: %8dK\n" - " Undo: %8dK\n", - (total_waste * HeapWordSize) / K, - (pss.alloc_buffer_waste() * HeapWordSize) / K, - (pss.undo_waste() * HeapWordSize) / K); + MutexLocker x(stats_lock()); + pss.print_termination_stats(i); } assert(pss.refs_to_scan() == 0, "Task queue should be empty"); @@ -4221,6 +4247,7 @@ if (ParallelGCThreads > 0) { // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); + if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); workers()->run_task(&g1_par_task); } else { StrongRootsScope srs(this); diff -r a03ae377b2e8 -r 5f429ee79634 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Aug 06 10:17:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Aug 09 05:41:05 2010 -0700 @@ -46,17 +46,7 @@ class ConcurrentG1Refine; class ConcurrentZFThread; -// If want to accumulate detailed statistics on work queues -// turn this on. -#define G1_DETAILED_STATS 0 - -#if G1_DETAILED_STATS -# define IF_G1_DETAILED_STATS(code) code -#else -# define IF_G1_DETAILED_STATS(code) -#endif - -typedef GenericTaskQueue RefToScanQueue; +typedef OverflowTaskQueue RefToScanQueue; typedef GenericTaskQueueSet RefToScanQueueSet; typedef int RegionIdx_t; // needs to hold [ 0..max_regions() ) @@ -471,6 +461,12 @@ virtual void shrink(size_t expand_bytes); void shrink_helper(size_t expand_bytes); + #if TASKQUEUE_STATS + static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); + void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const; + void reset_taskqueue_stats(); + #endif // TASKQUEUE_STATS + // Do an incremental collection: identify a collection set, and evacuate // its live objects elsewhere. virtual void do_collection_pause(); @@ -662,7 +658,7 @@ public: void set_refine_cte_cl_concurrency(bool concurrent); - RefToScanQueue *task_queue(int i); + RefToScanQueue *task_queue(int i) const; // A set of cards where updates happened during the GC DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } @@ -1579,9 +1575,6 @@ CardTableModRefBS* _ct_bs; G1RemSet* _g1_rem; - typedef GrowableArray OverflowQueue; - OverflowQueue* _overflowed_refs; - G1ParGCAllocBuffer _surviving_alloc_buffer; G1ParGCAllocBuffer _tenured_alloc_buffer; G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount]; @@ -1598,10 +1591,6 @@ int _queue_num; size_t _term_attempts; -#if G1_DETAILED_STATS - int _pushes, _pops, _steals, _steal_attempts; - int _overflow_pushes; -#endif double _start; double _start_strong_roots; @@ -1615,7 +1604,7 @@ // this points into the array, as we use the first few entries for padding size_t* _surviving_young_words; -#define PADDING_ELEM_NUM (64 / sizeof(size_t)) +#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t)) void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; } @@ -1650,15 +1639,14 @@ } RefToScanQueue* refs() { return _refs; } - OverflowQueue* overflowed_refs() { return _overflowed_refs; } ageTable* age_table() { return &_age_table; } G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { return _alloc_buffers[purpose]; } - size_t alloc_buffer_waste() { return _alloc_buffer_waste; } - size_t undo_waste() { return _undo_waste; } + size_t alloc_buffer_waste() const { return _alloc_buffer_waste; } + size_t undo_waste() const { return _undo_waste; } template void push_on_queue(T* ref) { assert(ref != NULL, "invariant"); @@ -1671,12 +1659,7 @@ assert(_g1h->obj_in_cs(p), "Should be in CS"); } #endif - if (!refs()->push(ref)) { - overflowed_refs()->push(ref); - IF_G1_DETAILED_STATS(note_overflow_push()); - } else { - IF_G1_DETAILED_STATS(note_push()); - } + refs()->push(ref); } void pop_from_queue(StarTask& ref) { @@ -1687,7 +1670,6 @@ _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref) : oopDesc::load_decode_heap_oop((oop*)ref)), "invariant"); - IF_G1_DETAILED_STATS(note_pop()); } else { StarTask null_task; ref = null_task; @@ -1695,7 +1677,8 @@ } void pop_from_overflow_queue(StarTask& ref) { - StarTask new_ref = overflowed_refs()->pop(); + StarTask new_ref; + refs()->pop_overflow(new_ref); assert((oop*)new_ref != NULL, "pop() from a local non-empty stack"); assert(UseCompressedOops || !new_ref.is_narrow(), "Error"); assert(has_partial_array_mask((oop*)new_ref) || @@ -1705,8 +1688,8 @@ ref = new_ref; } - int refs_to_scan() { return refs()->size(); } - int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + int refs_to_scan() { return refs()->size(); } + int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); } template void update_rs(HeapRegion* from, T* p, int tid) { if (G1DeferredRSUpdate) { @@ -1775,30 +1758,16 @@ int* hash_seed() { return &_hash_seed; } int queue_num() { return _queue_num; } - size_t term_attempts() { return _term_attempts; } + size_t term_attempts() const { return _term_attempts; } void note_term_attempt() { _term_attempts++; } -#if G1_DETAILED_STATS - int pushes() { return _pushes; } - int pops() { return _pops; } - int steals() { return _steals; } - int steal_attempts() { return _steal_attempts; } - int overflow_pushes() { return _overflow_pushes; } - - void note_push() { _pushes++; } - void note_pop() { _pops++; } - void note_steal() { _steals++; } - void note_steal_attempt() { _steal_attempts++; } - void note_overflow_push() { _overflow_pushes++; } -#endif - void start_strong_roots() { _start_strong_roots = os::elapsedTime(); } void end_strong_roots() { _strong_roots_time += (os::elapsedTime() - _start_strong_roots); } - double strong_roots_time() { return _strong_roots_time; } + double strong_roots_time() const { return _strong_roots_time; } void start_term_time() { note_term_attempt(); @@ -1807,12 +1776,17 @@ void end_term_time() { _term_time += (os::elapsedTime() - _start_term); } - double term_time() { return _term_time; } + double term_time() const { return _term_time; } - double elapsed() { + double elapsed_time() const { return os::elapsedTime() - _start; } + static void + print_termination_stats_hdr(outputStream* const st = gclog_or_tty); + void + print_termination_stats(int i, outputStream* const st = gclog_or_tty) const; + size_t* surviving_young_words() { // We add on to hide entry 0 which accumulates surviving words for // age -1 regions (i.e. non-young ones) diff -r a03ae377b2e8 -r 5f429ee79634 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Fri Aug 06 10:17:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Mon Aug 09 05:41:05 2010 -0700 @@ -81,11 +81,10 @@ return attempt_allocation_slow(word_size, permit_collection_pause); } -inline RefToScanQueue* G1CollectedHeap::task_queue(int i) { +inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const { return _task_queues->queue(i); } - inline bool G1CollectedHeap::isMarkedPrev(oop obj) const { return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj); } diff -r a03ae377b2e8 -r 5f429ee79634 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Fri Aug 06 10:17:21 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.cpp Mon Aug 09 05:41:05 2010 -0700 @@ -36,6 +36,14 @@ "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax" }; +TaskQueueStats & TaskQueueStats::operator +=(const TaskQueueStats & addend) +{ + for (unsigned int i = 0; i < last_stat_id; ++i) { + _stats[i] += addend._stats[i]; + } + return *this; +} + void TaskQueueStats::print_header(unsigned int line, outputStream* const stream, unsigned int width) { @@ -71,6 +79,29 @@ } #undef FMT } + +#ifdef ASSERT +// Invariants which should hold after a TaskQueue has been emptied and is +// quiescent; they do not hold at arbitrary times. +void TaskQueueStats::verify() const +{ + assert(get(push) == get(pop) + get(steal), + err_msg("push=" SIZE_FORMAT " pop=" SIZE_FORMAT " steal=" SIZE_FORMAT, + get(push), get(pop), get(steal))); + assert(get(pop_slow) <= get(pop), + err_msg("pop_slow=" SIZE_FORMAT " pop=" SIZE_FORMAT, + get(pop_slow), get(pop))); + assert(get(steal) <= get(steal_attempt), + err_msg("steal=" SIZE_FORMAT " steal_attempt=" SIZE_FORMAT, + get(steal), get(steal_attempt))); + assert(get(overflow) == 0 || get(push) != 0, + err_msg("overflow=" SIZE_FORMAT " push=" SIZE_FORMAT, + get(overflow), get(push))); + assert(get(overflow_max_len) == 0 || get(overflow) != 0, + err_msg("overflow_max_len=" SIZE_FORMAT " overflow=" SIZE_FORMAT, + get(overflow_max_len), get(overflow))); +} +#endif // ASSERT #endif // TASKQUEUE_STATS int TaskQueueSetSuper::randomParkAndMiller(int *seed0) { diff -r a03ae377b2e8 -r 5f429ee79634 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Fri Aug 06 10:17:21 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Mon Aug 09 05:41:05 2010 -0700 @@ -59,15 +59,21 @@ inline void record_steal(bool success); inline void record_overflow(size_t new_length); + TaskQueueStats & operator +=(const TaskQueueStats & addend); + inline size_t get(StatId id) const { return _stats[id]; } inline const size_t* get() const { return _stats; } inline void reset(); + // Print the specified line of the header (does not include a line separator). static void print_header(unsigned int line, outputStream* const stream = tty, unsigned int width = 10); + // Print the statistics (does not include a line separator). void print(outputStream* const stream = tty, unsigned int width = 10) const; + DEBUG_ONLY(void verify() const;) + private: size_t _stats[last_stat_id]; static const char * const _names[last_stat_id];