# HG changeset patch # User jcoomes # Date 1279341201 25200 # Node ID a93a9eda13f7b61fa6d638ea7c9a7b89f7bb75fb # Parent 1a1ce2076047740e842216147748094d1a5dd1a3 6962947: shared TaskQueue statistics Reviewed-by: tonyp, ysr diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Jul 16 21:33:21 2010 -0700 @@ -664,19 +664,14 @@ return; } - // XXX use a global constant instead of 64! - typedef struct OopTaskQueuePadded { - OopTaskQueue work_queue; - char pad[64 - sizeof(OopTaskQueue)]; // prevent false sharing - } OopTaskQueuePadded; - + typedef Padded PaddedOopTaskQueue; for (i = 0; i < num_queues; i++) { - OopTaskQueuePadded *q_padded = new OopTaskQueuePadded(); - if (q_padded == NULL) { + PaddedOopTaskQueue *q = new PaddedOopTaskQueue(); + if (q == NULL) { warning("work_queue allocation failure."); return; } - _task_queues->register_queue(i, &q_padded->work_queue); + _task_queues->register_queue(i, q); } for (i = 0; i < num_queues; i++) { _task_queues->queue(i)->initialize(); diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Jul 16 21:33:21 2010 -0700 @@ -539,10 +539,9 @@ guarantee(_task_queues != NULL, "task_queues allocation failure."); for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { - ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); - guarantee(q_padded != NULL, "work_queue Allocation failure."); - - _task_queues->register_queue(i1, &q_padded->work_queue); + ObjToScanQueue *q = new ObjToScanQueue(); + guarantee(q != NULL, "work_queue Allocation failure."); + _task_queues->register_queue(i1, q); } for (uint i2 = 0; i2 < ParallelGCThreads; i2++) diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,8 +33,8 @@ // but they must be here to allow ParScanClosure::do_oop_work to be defined // in genOopClosures.inline.hpp. -typedef OopTaskQueue ObjToScanQueue; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef Padded ObjToScanQueue; +typedef GenericTaskQueueSet ObjToScanQueueSet; // Enable this to get push/pop/steal stats. const int PAR_STATS_ENABLED = 0; @@ -304,12 +304,6 @@ friend class ParEvacuateFollowersClosure; private: - // XXX use a global constant instead of 64! - struct ObjToScanQueuePadded { - ObjToScanQueue work_queue; - char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing - }; - // The per-worker-thread work queues ObjToScanQueueSet* _task_queues; diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parNew/parOopClosures.hpp --- a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,8 @@ class ParScanThreadState; class ParNewGeneration; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef Padded ObjToScanQueue; +typedef GenericTaskQueueSet ObjToScanQueueSet; class ParallelTaskTerminator; class ParScanClosure: public OopsInGenClosure { diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Fri Jul 16 21:33:21 2010 -0700 @@ -90,10 +90,7 @@ } void PSPromotionManager::post_scavenge() { -#if PS_PM_STATS - print_stats(); -#endif // PS_PM_STATS - + TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats()); for (uint i = 0; i < ParallelGCThreads + 1; i++) { PSPromotionManager* manager = manager_array(i); if (UseDepthFirstScavengeOrder) { @@ -105,37 +102,58 @@ } } -#if PS_PM_STATS - +#if TASKQUEUE_STATS void -PSPromotionManager::print_stats(uint i) { - tty->print_cr("---- GC Worker %2d Stats", i); - tty->print_cr(" total pushes %8d", _total_pushes); - tty->print_cr(" masked pushes %8d", _masked_pushes); - tty->print_cr(" overflow pushes %8d", _overflow_pushes); - tty->print_cr(" max overflow length %8d", _max_overflow_length); - tty->print_cr(""); - tty->print_cr(" arrays chunked %8d", _arrays_chunked); - tty->print_cr(" array chunks processed %8d", _array_chunks_processed); - tty->print_cr(""); - tty->print_cr(" total steals %8d", _total_steals); - tty->print_cr(" masked steals %8d", _masked_steals); - tty->print_cr(""); +PSPromotionManager::print_taskqueue_stats(uint i) const { + const TaskQueueStats& stats = depth_first() ? + _claimed_stack_depth.stats : _claimed_stack_breadth.stats; + tty->print("%3u ", i); + stats.print(); + tty->cr(); } void +PSPromotionManager::print_local_stats(uint i) const { + #define FMT " " SIZE_FORMAT_W(10) + tty->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals, + _arrays_chunked, _array_chunks_processed); + #undef FMT +} + +static const char* const pm_stats_hdr[] = { + " --------masked------- arrays array", + "thr push steal chunked chunks", + "--- ---------- ---------- ---------- ----------" +}; + +void PSPromotionManager::print_stats() { - tty->print_cr("== GC Tasks Stats (%s), GC %3d", - (UseDepthFirstScavengeOrder) ? "Depth-First" : "Breadth-First", + const bool df = UseDepthFirstScavengeOrder; + tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth", Universe::heap()->total_collections()); - for (uint i = 0; i < ParallelGCThreads+1; ++i) { - PSPromotionManager* manager = manager_array(i); - manager->print_stats(i); + tty->print("thr "); TaskQueueStats::print_header(1); tty->cr(); + tty->print("--- "); TaskQueueStats::print_header(2); tty->cr(); + for (uint i = 0; i < ParallelGCThreads + 1; ++i) { + manager_array(i)->print_taskqueue_stats(i); + } + + const uint hlines = sizeof(pm_stats_hdr) / sizeof(pm_stats_hdr[0]); + for (uint i = 0; i < hlines; ++i) tty->print_cr(pm_stats_hdr[i]); + for (uint i = 0; i < ParallelGCThreads + 1; ++i) { + manager_array(i)->print_local_stats(i); } } -#endif // PS_PM_STATS +void +PSPromotionManager::reset_stats() { + TaskQueueStats& stats = depth_first() ? + claimed_stack_depth()->stats : claimed_stack_breadth()->stats; + stats.reset(); + _masked_pushes = _masked_steals = 0; + _arrays_chunked = _array_chunks_processed = 0; +} +#endif // TASKQUEUE_STATS PSPromotionManager::PSPromotionManager() { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); @@ -189,16 +207,7 @@ _prefetch_queue.clear(); -#if PS_PM_STATS - _total_pushes = 0; - _masked_pushes = 0; - _overflow_pushes = 0; - _max_overflow_length = 0; - _arrays_chunked = 0; - _array_chunks_processed = 0; - _total_steals = 0; - _masked_steals = 0; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(reset_stats()); } @@ -423,14 +432,9 @@ new_obj->is_objArray() && PSChunkLargeArrays) { // we'll chunk it -#if PS_PM_STATS - ++_arrays_chunked; -#endif // PS_PM_STATS oop* const masked_o = mask_chunked_array_oop(o); push_depth(masked_o); -#if PS_PM_STATS - ++_masked_pushes; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes); } else { // we'll just push its contents new_obj->push_contents(this); @@ -494,9 +498,7 @@ assert(old->is_objArray(), "invariant"); assert(old->is_forwarded(), "invariant"); -#if PS_PM_STATS - ++_array_chunks_processed; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_array_chunks_processed); oop const obj = old->forwardee(); @@ -508,9 +510,7 @@ assert(start > 0, "invariant"); arrayOop(old)->set_length(start); push_depth(mask_chunked_array_oop(old)); -#if PS_PM_STATS - ++_masked_pushes; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_masked_pushes); } else { // this is the final chunk for this array start = 0; diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -42,8 +42,6 @@ class PSOldGen; class ParCompactionManager; -#define PS_PM_STATS 0 - class PSPromotionManager : public CHeapObj { friend class PSScavenge; friend class PSRefProcTaskExecutor; @@ -54,22 +52,18 @@ static PSOldGen* _old_gen; static MutableSpace* _young_space; -#if PS_PM_STATS - uint _total_pushes; - uint _masked_pushes; - - uint _overflow_pushes; - uint _max_overflow_length; +#if TASKQUEUE_STATS + size_t _masked_pushes; + size_t _masked_steals; + size_t _arrays_chunked; + size_t _array_chunks_processed; - uint _arrays_chunked; - uint _array_chunks_processed; + void print_taskqueue_stats(uint i) const; + void print_local_stats(uint i) const; + static void print_stats(); - uint _total_steals; - uint _masked_steals; - - void print_stats(uint i); - static void print_stats(); -#endif // PS_PM_STATS + void reset_stats(); +#endif // TASKQUEUE_STATS PSYoungPromotionLAB _young_lab; PSOldPromotionLAB _old_lab; @@ -143,42 +137,12 @@ template void push_depth(T* p) { assert(depth_first(), "pre-condition"); - -#if PS_PM_STATS - ++_total_pushes; - int stack_length = claimed_stack_depth()->overflow_stack()->length(); -#endif // PS_PM_STATS - claimed_stack_depth()->push(p); - -#if PS_PM_STATS - if (claimed_stack_depth()->overflow_stack()->length() != stack_length) { - ++_overflow_pushes; - if ((uint)stack_length + 1 > _max_overflow_length) { - _max_overflow_length = (uint)stack_length + 1; - } - } -#endif // PS_PM_STATS } void push_breadth(oop o) { assert(!depth_first(), "pre-condition"); - -#if PS_PM_STATS - ++_total_pushes; - int stack_length = claimed_stack_breadth()->overflow_stack()->length(); -#endif // PS_PM_STATS - claimed_stack_breadth()->push(o); - -#if PS_PM_STATS - if (claimed_stack_breadth()->overflow_stack()->length() != stack_length) { - ++_overflow_pushes; - if ((uint)stack_length + 1 > _max_overflow_length) { - _max_overflow_length = (uint)stack_length + 1; - } - } -#endif // PS_PM_STATS } protected: @@ -256,12 +220,5 @@ template inline void claim_or_forward_depth(T* p); template inline void claim_or_forward_breadth(T* p); -#if PS_PM_STATS - void increment_steals(oop* p = NULL) { - _total_steals += 1; - if (p != NULL && is_oop_masked(p)) { - _masked_steals += 1; - } - } -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);) }; diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,3 +124,11 @@ } } } + +#if TASKQUEUE_STATS +void PSPromotionManager::record_steal(StarTask& p) { + if (is_oop_masked(p)) { + ++_masked_steals; + } +} +#endif // TASKQUEUE_STATS diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Fri Jul 16 21:33:21 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -148,9 +148,7 @@ while(true) { StarTask p; if (PSPromotionManager::steal_depth(which, &random_seed, p)) { -#if PS_PM_STATS - pm->increment_steals(p); -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(pm->record_steal(p)); pm->process_popped_location_depth(p); pm->drain_stacks_depth(true); } else { @@ -163,9 +161,6 @@ while(true) { oop obj; if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) { -#if PS_PM_STATS - pm->increment_steals(); -#endif // PS_PM_STATS obj->copy_contents(pm); pm->drain_stacks_breadth(true); } else { diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -345,6 +345,35 @@ return align_size_up(offset, HeapWordsPerLong); } +// The expected size in bytes of a cache line, used to pad data structures. +#define DEFAULT_CACHE_LINE_SIZE 64 + +// Bytes needed to pad type to avoid cache-line sharing; alignment should be the +// expected cache line size (a power of two). The first addend avoids sharing +// when the start address is not a multiple of alignment; the second maintains +// alignment of starting addresses that happen to be a multiple. +#define PADDING_SIZE(type, alignment) \ + ((alignment) + align_size_up_(sizeof(type), alignment)) + +// Templates to create a subclass padded to avoid cache line sharing. These are +// effective only when applied to derived-most (leaf) classes. + +// When no args are passed to the base ctor. +template +class Padded: public T { +private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; + +// When either 0 or 1 args may be passed to the base ctor. +template +class Padded01: public T { +public: + Padded01(): T() { } + Padded01(Arg1T arg1): T(arg1) { } +private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; //---------------------------------------------------------------------------------------------------- // Utility macros for compilers diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.cpp Fri Jul 16 21:33:21 2010 -0700 @@ -31,6 +31,48 @@ uint ParallelTaskTerminator::_total_peeks = 0; #endif +#if TASKQUEUE_STATS +const char * const TaskQueueStats::_names[last_stat_id] = { + "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax" +}; + +void TaskQueueStats::print_header(unsigned int line, outputStream* const stream, + unsigned int width) +{ + // Use a width w: 1 <= w <= max_width + const unsigned int max_width = 40; + const unsigned int w = MAX2(MIN2(width, max_width), 1U); + + if (line == 0) { // spaces equal in width to the header + const unsigned int hdr_width = w * last_stat_id + last_stat_id - 1; + stream->print("%*s", hdr_width, " "); + } else if (line == 1) { // labels + stream->print("%*s", w, _names[0]); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" %*s", w, _names[i]); + } + } else if (line == 2) { // dashed lines + char dashes[max_width + 1]; + memset(dashes, '-', w); + dashes[w] = '\0'; + stream->print("%s", dashes); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" %s", dashes); + } + } +} + +void TaskQueueStats::print(outputStream* stream, unsigned int width) const +{ + #define FMT SIZE_FORMAT_W(*) + stream->print(FMT, width, _stats[0]); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" " FMT, width, _stats[i]); + } + #undef FMT +} +#endif // TASKQUEUE_STATS + int TaskQueueSetSuper::randomParkAndMiller(int *seed0) { const int a = 16807; const int m = 2147483647; diff -r 1a1ce2076047 -r a93a9eda13f7 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Fri Jul 16 10:09:15 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Fri Jul 16 21:33:21 2010 -0700 @@ -22,6 +22,72 @@ * */ +// Simple TaskQueue stats that are collected by default in debug builds. + +#if !defined(TASKQUEUE_STATS) && defined(ASSERT) +#define TASKQUEUE_STATS 1 +#elif !defined(TASKQUEUE_STATS) +#define TASKQUEUE_STATS 0 +#endif + +#if TASKQUEUE_STATS +#define TASKQUEUE_STATS_ONLY(code) code +#else +#define TASKQUEUE_STATS_ONLY(code) +#endif // TASKQUEUE_STATS + +#if TASKQUEUE_STATS +class TaskQueueStats { +public: + enum StatId { + push, // number of taskqueue pushes + pop, // number of taskqueue pops + pop_slow, // subset of taskqueue pops that were done slow-path + steal_attempt, // number of taskqueue steal attempts + steal, // number of taskqueue steals + overflow, // number of overflow pushes + overflow_max_len, // max length of overflow stack + last_stat_id + }; + +public: + inline TaskQueueStats() { reset(); } + + inline void record_push() { ++_stats[push]; } + inline void record_pop() { ++_stats[pop]; } + inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; } + inline void record_steal(bool success); + inline void record_overflow(size_t new_length); + + inline size_t get(StatId id) const { return _stats[id]; } + inline const size_t* get() const { return _stats; } + + inline void reset(); + + static void print_header(unsigned int line, outputStream* const stream = tty, + unsigned int width = 10); + void print(outputStream* const stream = tty, unsigned int width = 10) const; + +private: + size_t _stats[last_stat_id]; + static const char * const _names[last_stat_id]; +}; + +void TaskQueueStats::record_steal(bool success) { + ++_stats[steal_attempt]; + if (success) ++_stats[steal]; +} + +void TaskQueueStats::record_overflow(size_t new_len) { + ++_stats[overflow]; + if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len; +} + +void TaskQueueStats::reset() { + memset(_stats, 0, sizeof(_stats)); +} +#endif // TASKQUEUE_STATS + template class TaskQueueSuper: public CHeapObj { protected: @@ -135,6 +201,8 @@ // Total size of queue. static const uint total_size() { return N; } + + TASKQUEUE_STATS_ONLY(TaskQueueStats stats;) }; template @@ -152,6 +220,7 @@ public: using TaskQueueSuper::max_elems; using TaskQueueSuper::size; + TASKQUEUE_STATS_ONLY(using TaskQueueSuper::stats;) private: // Slow paths for push, pop_local. (pop_global has no fast path.) @@ -224,14 +293,14 @@ // g++ complains if the volatile result of the assignment is unused. const_cast(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); + TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } return false; } template -bool GenericTaskQueue:: -pop_local_slow(uint localBot, Age oldAge) { +bool GenericTaskQueue::pop_local_slow(uint localBot, Age oldAge) { // This queue was observed to contain exactly one element; either this // thread will claim it, or a competing "pop_global". In either case, // the queue will be logically empty afterwards. Create a new Age value @@ -251,6 +320,7 @@ if (tempAge == oldAge) { // We win. assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); + TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); return true; } } @@ -306,6 +376,8 @@ typedef GrowableArray overflow_t; typedef GenericTaskQueue taskqueue_t; + TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) + OverflowTaskQueue(); ~OverflowTaskQueue(); void initialize(); @@ -356,6 +428,7 @@ { if (!taskqueue_t::push(t)) { overflow_stack()->push(t); + TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length())); } return true; } @@ -424,9 +497,13 @@ template bool GenericTaskQueueSet::steal(uint queue_num, int* seed, E& t) { - for (uint i = 0; i < 2 * _n; i++) - if (steal_best_of_2(queue_num, seed, t)) + for (uint i = 0; i < 2 * _n; i++) { + if (steal_best_of_2(queue_num, seed, t)) { + TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true)); return true; + } + } + TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false)); return false; } @@ -574,6 +651,7 @@ // g++ complains if the volatile result of the assignment is unused. const_cast(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); + TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } else { return push_slow(t, dirty_n_elems); @@ -603,6 +681,7 @@ idx_t tp = _age.top(); // XXX if (size(localBot, tp) > 0) { assert(dirty_size(localBot, tp) != N - 1, "sanity"); + TASKQUEUE_STATS_ONLY(stats.record_pop()); return true; } else { // Otherwise, the queue contained exactly one element; we take the slow