# HG changeset patch # User jcoomes # Date 1281402230 25200 # Node ID 94251661de7611bf9fab3dbce76cd726193bc45b # Parent 5f429ee79634c326a1b8acb3d4d72be9f90a9395 6970376: ParNew: shared TaskQueue statistics Reviewed-by: ysr diff -r 5f429ee79634 -r 94251661de76 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Aug 09 05:41:05 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Aug 09 18:03:50 2010 -0700 @@ -51,9 +51,14 @@ _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), _keep_alive_closure(&_scan_weak_ref_closure), _promotion_failure_size(0), - _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), _strong_roots_time(0.0), _term_time(0.0) { + #if TASKQUEUE_STATS + _term_attempts = 0; + _overflow_refills = 0; + _overflow_refill_objs = 0; + #endif // TASKQUEUE_STATS + _survivor_chunk_array = (ChunkArray*) old_gen()->get_data_recorder(thread_num()); _hash_seed = 17; // Might want to take time-based random value. @@ -100,7 +105,6 @@ // Push remainder. bool ok = work_queue()->push(old); assert(ok, "just popped, push must be okay"); - note_push(); } else { // Restore length so that it can be used if there // is a promotion failure and forwarding pointers @@ -126,7 +130,6 @@ while (queue->size() > (juint)max_size) { oop obj_to_scan; if (queue->pop_local(obj_to_scan)) { - note_pop(); if ((HeapWord *)obj_to_scan < young_old_boundary()) { if (obj_to_scan->is_objArray() && obj_to_scan->is_forwarded() && @@ -271,20 +274,28 @@ GrowableArray** overflow_stacks_, size_t desired_plab_sz, ParallelTaskTerminator& term); + + ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); } + inline ParScanThreadState& thread_state(int i); - int pushes() { return _pushes; } - int pops() { return _pops; } - int steals() { return _steals; } + void reset(bool promotion_failed); void flush(); + + #if TASKQUEUE_STATS + static void + print_termination_stats_hdr(outputStream* const st = gclog_or_tty); + void print_termination_stats(outputStream* const st = gclog_or_tty); + static void + print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); + void print_taskqueue_stats(outputStream* const st = gclog_or_tty); + void reset_stats(); + #endif // TASKQUEUE_STATS + private: ParallelTaskTerminator& _term; ParNewGeneration& _gen; Generation& _next_gen; - // staticstics - int _pushes; - int _pops; - int _steals; }; @@ -294,8 +305,7 @@ GrowableArray** overflow_stack_set_, size_t desired_plab_sz, ParallelTaskTerminator& term) : ResourceArray(sizeof(ParScanThreadState), num_threads), - _gen(gen), _next_gen(old_gen), _term(term), - _pushes(0), _pops(0), _steals(0) + _gen(gen), _next_gen(old_gen), _term(term) { assert(num_threads > 0, "sanity check!"); // Initialize states. @@ -323,6 +333,82 @@ } } +#if TASKQUEUE_STATS +void +ParScanThreadState::reset_stats() +{ + taskqueue_stats().reset(); + _term_attempts = 0; + _overflow_refills = 0; + _overflow_refill_objs = 0; +} + +void ParScanThreadStateSet::reset_stats() +{ + for (int i = 0; i < length(); ++i) { + thread_state(i).reset_stats(); + } +} + +void +ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) +{ + st->print_raw_cr("GC Termination Stats"); + st->print_raw_cr(" elapsed --strong roots-- " + "-------termination-------"); + st->print_raw_cr("thr ms ms % " + " ms % attempts"); + st->print_raw_cr("--- --------- --------- ------ " + "--------- ------ --------"); +} + +void ParScanThreadStateSet::print_termination_stats(outputStream* const st) +{ + print_termination_stats_hdr(st); + + for (int i = 0; i < length(); ++i) { + const ParScanThreadState & pss = thread_state(i); + const double elapsed_ms = pss.elapsed_time() * 1000.0; + const double s_roots_ms = pss.strong_roots_time() * 1000.0; + const double term_ms = pss.term_time() * 1000.0; + st->print_cr("%3d %9.2f %9.2f %6.2f " + "%9.2f %6.2f " SIZE_FORMAT_W(8), + i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, + term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts()); + } +} + +// Print stats related to work queue activity. +void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) +{ + st->print_raw_cr("GC Task Stats"); + st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); + st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); +} + +void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) +{ + print_taskqueue_stats_hdr(st); + + TaskQueueStats totals; + for (int i = 0; i < length(); ++i) { + const ParScanThreadState & pss = thread_state(i); + const TaskQueueStats & stats = pss.taskqueue_stats(); + st->print("%3d ", i); stats.print(st); st->cr(); + totals += stats; + + if (pss.overflow_refills() > 0) { + st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills " + SIZE_FORMAT_W(10) " overflow objects", + pss.overflow_refills(), pss.overflow_refill_objs()); + } + } + st->print("tot "); totals.print(st); st->cr(); + + DEBUG_ONLY(totals.verify()); +} +#endif // TASKQUEUE_STATS + void ParScanThreadStateSet::flush() { // Work in this loop should be kept as lightweight as @@ -346,42 +432,8 @@ // Inform old gen that we're done. _next_gen.par_promote_alloc_done(i); _next_gen.par_oop_since_save_marks_iterate_done(i); + } - // Flush stats related to work queue activity (push/pop/steal) - // This could conceivably become a bottleneck; if so, we'll put the - // stat's gathering under the flag. - if (PAR_STATS_ENABLED) { - _pushes += par_scan_state.pushes(); - _pops += par_scan_state.pops(); - _steals += par_scan_state.steals(); - if (ParallelGCVerbose) { - gclog_or_tty->print("Thread %d complete:\n" - " Pushes: %7d Pops: %7d Steals %7d (in %d attempts)\n", - i, par_scan_state.pushes(), par_scan_state.pops(), - par_scan_state.steals(), par_scan_state.steal_attempts()); - if (par_scan_state.overflow_pushes() > 0 || - par_scan_state.overflow_refills() > 0) { - gclog_or_tty->print(" Overflow pushes: %7d " - "Overflow refills: %7d for %d objs.\n", - par_scan_state.overflow_pushes(), - par_scan_state.overflow_refills(), - par_scan_state.overflow_refill_objs()); - } - - double elapsed = par_scan_state.elapsed(); - double strong_roots = par_scan_state.strong_roots_time(); - double term = par_scan_state.term_time(); - gclog_or_tty->print( - " Elapsed: %7.2f ms.\n" - " Strong roots: %7.2f ms (%6.2f%%)\n" - " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", - elapsed * 1000.0, - strong_roots * 1000.0, (strong_roots*100.0/elapsed), - term * 1000.0, (term*100.0/elapsed), - par_scan_state.term_attempts()); - } - } - } if (UseConcMarkSweepGC && ParallelGCThreads > 0) { // We need to call this even when ResizeOldPLAB is disabled // so as to avoid breaking some asserts. While we may be able @@ -456,15 +508,12 @@ // We have no local work, attempt to steal from other threads. // attempt to steal work from promoted. - par_scan_state()->note_steal_attempt(); if (task_queues()->steal(par_scan_state()->thread_num(), par_scan_state()->hash_seed(), obj_to_scan)) { - par_scan_state()->note_steal(); bool res = work_q->push(obj_to_scan); assert(res, "Empty queue should have room for a push."); - par_scan_state()->note_push(); // if successful, goto Start. continue; @@ -842,17 +891,6 @@ } thread_state_set.reset(promotion_failed()); - if (PAR_STATS_ENABLED && ParallelGCVerbose) { - gclog_or_tty->print("Thread totals:\n" - " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n", - thread_state_set.pushes(), thread_state_set.pops(), - thread_state_set.steals(), - thread_state_set.pops()+thread_state_set.steals()); - } - assert(thread_state_set.pushes() == thread_state_set.pops() - + thread_state_set.steals(), - "Or else the queues are leaky."); - // Process (weak) reference objects found during scavenge. ReferenceProcessor* rp = ref_processor(); IsAliveClosure is_alive(this); @@ -932,6 +970,9 @@ gch->print_heap_change(gch_prev_used); } + TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats()); + TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats()); + if (UseAdaptiveSizePolicy) { size_policy->minor_collection_end(gch->gc_cause()); size_policy->avg_survived()->sample(from()->used()); @@ -1104,9 +1145,8 @@ gclog_or_tty->print("queue overflow!\n"); } push_on_overflow_list(old, par_scan_state); - par_scan_state->note_overflow_push(); + TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); } - par_scan_state->note_push(); return new_obj; } @@ -1227,9 +1267,8 @@ if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { // Add stats for overflow pushes. push_on_overflow_list(old, par_scan_state); - par_scan_state->note_overflow_push(); + TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); } - par_scan_state->note_push(); return new_obj; } @@ -1466,7 +1505,7 @@ cur = next; n++; } - par_scan_state->note_overflow_refill(n); + TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n)); #ifndef PRODUCT assert(_num_par_pushes >= n, "Too many pops?"); Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); diff -r 5f429ee79634 -r 94251661de76 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Aug 09 05:41:05 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Aug 09 18:03:50 2010 -0700 @@ -36,9 +36,6 @@ typedef Padded ObjToScanQueue; typedef GenericTaskQueueSet ObjToScanQueueSet; -// Enable this to get push/pop/steal stats. -const int PAR_STATS_ENABLED = 0; - class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure { private: ParScanWeakRefClosure* _par_cl; @@ -94,8 +91,11 @@ bool _to_space_full; - int _pushes, _pops, _steals, _steal_attempts, _term_attempts; - int _overflow_pushes, _overflow_refills, _overflow_refill_objs; +#if TASKQUEUE_STATS + size_t _term_attempts; + size_t _overflow_refills; + size_t _overflow_refill_objs; +#endif // TASKQUEUE_STATS // Stats for promotion failure size_t _promotion_failure_size; @@ -181,45 +181,38 @@ } void print_and_clear_promotion_failure_size(); - int pushes() { return _pushes; } - int pops() { return _pops; } - int steals() { return _steals; } - int steal_attempts() { return _steal_attempts; } - int term_attempts() { return _term_attempts; } - int overflow_pushes() { return _overflow_pushes; } - int overflow_refills() { return _overflow_refills; } - int overflow_refill_objs() { return _overflow_refill_objs; } +#if TASKQUEUE_STATS + TaskQueueStats & taskqueue_stats() const { return _work_queue->stats; } + + size_t term_attempts() const { return _term_attempts; } + size_t overflow_refills() const { return _overflow_refills; } + size_t overflow_refill_objs() const { return _overflow_refill_objs; } - void note_push() { if (PAR_STATS_ENABLED) _pushes++; } - void note_pop() { if (PAR_STATS_ENABLED) _pops++; } - void note_steal() { if (PAR_STATS_ENABLED) _steals++; } - void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; } - void note_term_attempt() { if (PAR_STATS_ENABLED) _term_attempts++; } - void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; } - void note_overflow_refill(int objs) { - if (PAR_STATS_ENABLED) { - _overflow_refills++; - _overflow_refill_objs += objs; - } + void note_term_attempt() { ++_term_attempts; } + void note_overflow_refill(size_t objs) { + ++_overflow_refills; _overflow_refill_objs += objs; } + void reset_stats(); +#endif // TASKQUEUE_STATS + void start_strong_roots() { _start_strong_roots = os::elapsedTime(); } void end_strong_roots() { _strong_roots_time += (os::elapsedTime() - _start_strong_roots); } - double strong_roots_time() { return _strong_roots_time; } + double strong_roots_time() const { return _strong_roots_time; } void start_term_time() { - note_term_attempt(); + TASKQUEUE_STATS_ONLY(note_term_attempt()); _start_term = os::elapsedTime(); } void end_term_time() { _term_time += (os::elapsedTime() - _start_term); } - double term_time() { return _term_time; } + double term_time() const { return _term_time; } - double elapsed() { + double elapsed_time() const { return os::elapsedTime() - _start; } };