# HG changeset patch # User jmasa # Date 1234127881 28800 # Node ID 05c6d52fa7a9882e2d0fe74bec7db5546e1efbe9 # Parent 58054a18d73593f8392afe892f57d8b0b3fc469c 6690928: Use spinning in combination with yields for workstealing termination. Summary: Substitute a spin loop for most calls to yield() to reduce the stress on the system. Reviewed-by: tonyp diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -949,6 +949,10 @@ GCOverheadReporter::recordSTWEnd(end); g1_policy()->record_full_collection_end(); +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + gc_epilogue(true); // Abandon concurrent refinement. This must happen last: in the @@ -2647,8 +2651,13 @@ } } - if (mark_in_progress()) + if (mark_in_progress()) { concurrent_mark()->update_g1_committed(); + } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif gc_epilogue(false); } diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -362,6 +362,10 @@ if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy, diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -2203,6 +2203,10 @@ collection_exit.ticks()); gc_task_manager()->print_task_time_stamps(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy, diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -615,6 +615,10 @@ gc_task_manager()->print_task_time_stamps(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + return !promotion_failure_occurred; } diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/memory/genCollectedHeap.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -610,6 +610,10 @@ Universe::print_heap_after_gc(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); vm_exit(-1); diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/oops/cpCacheKlass.cpp --- a/src/share/vm/oops/cpCacheKlass.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/oops/cpCacheKlass.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -161,7 +161,7 @@ } bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const { - assert(obj->is_constantPoolCache(), "must be constMethod oop"); + assert(obj->is_constantPoolCache(), "should be constant pool"); return constantPoolCacheOop(obj)->is_conc_safe(); } diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/runtime/globals.hpp Sun Feb 08 13:18:01 2009 -0800 @@ -1655,6 +1655,13 @@ develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \ "Number of yields before a sleep is done during workstealing") \ \ + develop(uintx, WorkStealingHardSpins, 4096, \ + "Number of iterations in a spin loop between checks on " \ + "time out of hard spin") \ + \ + develop(uintx, WorkStealingSpinToYieldRatio, 10, \ + "Ratio of hard spins to calls to yield") \ + \ product(uintx, PreserveMarkStackSize, 1024, \ "Size for stack used in promotion failure handling") \ \ diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/utilities/taskqueue.cpp Sun Feb 08 13:18:01 2009 -0800 @@ -25,6 +25,12 @@ # include "incls/_precompiled.incl" # include "incls/_taskqueue.cpp.incl" +#ifdef TRACESPINNING +uint ParallelTaskTerminator::_total_yields = 0; +uint ParallelTaskTerminator::_total_spins = 0; +uint ParallelTaskTerminator::_total_peeks = 0; +#endif + bool TaskQueueSuper::peek() { return _bottom != _age.top(); } @@ -70,14 +76,61 @@ Atomic::inc(&_offered_termination); uint yield_count = 0; + // Number of hard spin loops done since last yield + uint hard_spin_count = 0; + // Number of iterations in the hard spin loop. + uint hard_spin_limit = WorkStealingHardSpins; + + // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. + // If it is greater than 0, then start with a small number + // of spins and increase number with each turn at spinning until + // the count of hard spins exceeds WorkStealingSpinToYieldRatio. + // Then do a yield() call and start spinning afresh. + if (WorkStealingSpinToYieldRatio > 0) { + hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; + hard_spin_limit = MAX2(hard_spin_limit, 1U); + } + // Remember the initial spin limit. + uint hard_spin_start = hard_spin_limit; + + // Loop waiting for all threads to offer termination or + // more work. while (true) { + // Are all threads offering termination? if (_offered_termination == _n_threads) { - //inner_termination_loop(); return true; } else { + // Look for more work. + // Periodically sleep() instead of yield() to give threads + // waiting on the cores the chance to grab this code if (yield_count <= WorkStealingYieldsBeforeSleep) { + // Do a yield or hardspin. For purposes of deciding whether + // to sleep, count this as a yield. yield_count++; - yield(); + + // Periodically call yield() instead spinning + // After WorkStealingSpinToYieldRatio spins, do a yield() call + // and reset the counts and starting limit. + if (hard_spin_count > WorkStealingSpinToYieldRatio) { + yield(); + hard_spin_count = 0; + hard_spin_limit = hard_spin_start; +#ifdef TRACESPINNING + _total_yields++; +#endif + } else { + // Hard spin this time + // Increase the hard spinning period but only up to a limit. + hard_spin_limit = MIN2(2*hard_spin_limit, + (uint) WorkStealingHardSpins); + for (uint j = 0; j < hard_spin_limit; j++) { + SpinPause(); + } + hard_spin_count++; +#ifdef TRACESPINNING + _total_spins++; +#endif + } } else { if (PrintGCDetails && Verbose) { gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() " @@ -92,6 +145,9 @@ sleep(WorkStealingSleepMillis); } +#ifdef TRACESPINNING + _total_peeks++; +#endif if (peek_in_queue_set() || (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); @@ -101,6 +157,16 @@ } } +#ifdef TRACESPINNING +void ParallelTaskTerminator::print_termination_counts() { + gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld " + "Total spins: %lld Total peeks: %lld", + total_yields(), + total_spins(), + total_peeks()); +} +#endif + void ParallelTaskTerminator::reset_for_reuse() { if (_offered_termination != 0) { assert(_offered_termination == _n_threads, diff -r 58054a18d735 -r 05c6d52fa7a9 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Fri Feb 06 01:38:50 2009 +0300 +++ b/src/share/vm/utilities/taskqueue.hpp Sun Feb 08 13:18:01 2009 -0800 @@ -426,12 +426,20 @@ // A class to aid in the termination of a set of parallel tasks using // TaskQueueSet's for work stealing. +#undef TRACESPINNING + class ParallelTaskTerminator: public StackObj { private: int _n_threads; TaskQueueSetSuper* _queue_set; int _offered_termination; +#ifdef TRACESPINNING + static uint _total_yields; + static uint _total_spins; + static uint _total_peeks; +#endif + bool peek_in_queue_set(); protected: virtual void yield(); @@ -462,6 +470,12 @@ // the terminator is finished. void reset_for_reuse(); +#ifdef TRACESPINNING + static uint total_yields() { return _total_yields; } + static uint total_spins() { return _total_spins; } + static uint total_peeks() { return _total_peeks; } + static void print_termination_counts(); +#endif }; #define SIMPLE_STACK 0