truffle: src/share/vm/runtime/thread.cpp comparison

comparison src/share/vm/runtime/thread.cpp @ 1878:fa83ab460c54

6988353: refactor contended sync subsystem Summary: reduce complexity by factoring synchronizer.cpp Reviewed-by: dholmes, never, coleenp

author	acorn
date	Fri, 22 Oct 2010 15:59:34 -0400
parents	75b0735b4d04
children	5caa30ea147b

comparison

equal deleted inserted replaced

-:75ab0162aa84
+:fa83ab460c54
 // Enable guard page *after* os::create_main_thread(), otherwise it would
 // crash Linux VM, see notes in os_linux.cpp.
 main_thread->create_stack_guard_pages();
-// Initialize Java-Leve synchronization subsystem
+// Initialize Java-Level synchronization subsystem
-ObjectSynchronizer::Initialize() ;
+ObjectMonitor::Initialize() ;
 // Initialize global modules
 jint status = init_globals();
 if (status != JNI_OK) {
 delete main_thread;
 current->print_on_error(st, buf, buflen);
 st->cr();
 }
 }
+// Internal SpinLock and Mutex
-// Lifecycle management for TSM ParkEvents.
+// Based on ParkEvent
-// ParkEvents are type-stable (TSM).
-// In our particular implementation they happen to be immortal.
+// Ad-hoc mutual exclusion primitives: SpinLock and Mux
 //
-// We manage concurrency on the FreeList with a CAS-based
+// We employ SpinLocks _only for low-contention, fixed-length
-// detach-modify-reattach idiom that avoids the ABA problems
+// short-duration critical sections where we're concerned
-// that would otherwise be present in a simple CAS-based
+// about native mutex_t or HotSpot Mutex:: latency.
-// push-pop implementation.   (push-one and pop-all)
+// The mux construct provides a spin-then-block mutual exclusion
+// mechanism.
 //
-// Caveat: Allocate() and Release() may be called from threads
+// Testing has shown that contention on the ListLock guarding gFreeList
-// other than the thread associated with the Event!
+// is common.  If we implement ListLock as a simple SpinLock it's common
-// If we need to call Allocate() when running as the thread in
+// for the JVM to devolve to yielding with little progress.  This is true
-// question then look for the PD calls to initialize native TLS.
+// despite the fact that the critical sections protected by ListLock are
-// Native TLS (Win32/Linux/Solaris) can only be initialized or
+// extremely short.
-// accessed by the associated thread.
-// See also pd_initialize().
 //
-// Note that we could defer associating a ParkEvent with a thread
+// TODO-FIXME: ListLock should be of type SpinLock.
-// until the 1st time the thread calls park().  unpark() calls to
+// We should make this a 1st-class type, integrated into the lock
-// an unprovisioned thread would be ignored.  The first park() call
+// hierarchy as leaf-locks.  Critically, the SpinLock structure
-// for a thread would allocate and associate a ParkEvent and return
+// should have sufficient padding to avoid false-sharing and excessive
-// immediately.
+// cache-coherency traffic.
-volatile int ParkEvent::ListLock = 0 ;
-ParkEvent * volatile ParkEvent::FreeList = NULL ;
+typedef volatile int SpinLockT ;
-ParkEvent * ParkEvent::Allocate (Thread * t) {
+void Thread::SpinAcquire (volatile int * adr, const char * LockName) {
-// In rare cases -- JVM_RawMonitor* operations -- we can find t == null.
+if (Atomic::cmpxchg (1, adr, 0) == 0) {
-ParkEvent * ev ;
+return ;   // normal fast-path return
+}
-// Start by trying to recycle an existing but unassociated
-// ParkEvent from the global free list.
+// Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
+TEVENT (SpinAcquire - ctx) ;
+int ctr = 0 ;
+int Yields = 0 ;
 for (;;) {
-ev = FreeList ;
+while (*adr != 0) {
-if (ev == NULL) break ;
+++ctr ;
-// 1: Detach - sequester or privatize the list
+if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
-// Tantamount to ev = Swap (&FreeList, NULL)
+if (Yields > 5) {
-if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
+// Consider using a simple NakedSleep() instead.
-continue ;
+// Then SpinAcquire could be called by non-JVM threads
-}
+Thread::current()->_ParkEvent->park(1) ;
+} else {
-// We've detached the list.  The list in-hand is now
+os::NakedYield() ;
-// local to this thread.   This thread can operate on the
+++Yields ;
-// list without risk of interference from other threads.
+}
-// 2: Extract -- pop the 1st element from the list.
+} else {
-ParkEvent * List = ev->FreeNext ;
+SpinPause() ;
-if (List == NULL) break ;
+}
+}
+if (Atomic::cmpxchg (1, adr, 0) == 0) return ;
+}
+}
+void Thread::SpinRelease (volatile int * adr) {
+assert (*adr != 0, "invariant") ;
+OrderAccess::fence() ;      // guarantee at least release consistency.
+// Roach-motel semantics.
+// It's safe if subsequent LDs and STs float "up" into the critical section,
+// but prior LDs and STs within the critical section can't be allowed
+// to reorder or float past the ST that releases the lock.
+*adr = 0 ;
+}
+// muxAcquire and muxRelease:
+//
+// *  muxAcquire and muxRelease support a single-word lock-word construct.
+//    The LSB of the word is set IFF the lock is held.
+//    The remainder of the word points to the head of a singly-linked list
+//    of threads blocked on the lock.
+//
+// *  The current implementation of muxAcquire-muxRelease uses its own
+//    dedicated Thread._MuxEvent instance.  If we're interested in
+//    minimizing the peak number of extant ParkEvent instances then
+//    we could eliminate _MuxEvent and "borrow" _ParkEvent as long
+//    as certain invariants were satisfied.  Specifically, care would need
+//    to be taken with regards to consuming unpark() "permits".
+//    A safe rule of thumb is that a thread would never call muxAcquire()
+//    if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently
+//    park().  Otherwise the _ParkEvent park() operation in muxAcquire() could
+//    consume an unpark() permit intended for monitorenter, for instance.
+//    One way around this would be to widen the restricted-range semaphore
+//    implemented in park().  Another alternative would be to provide
+//    multiple instances of the PlatformEvent() for each thread.  One
+//    instance would be dedicated to muxAcquire-muxRelease, for instance.
+//
+// *  Usage:
+//    -- Only as leaf locks
+//    -- for short-term locking only as muxAcquire does not perform
+//       thread state transitions.
+//
+// Alternatives:
+// *  We could implement muxAcquire and muxRelease with MCS or CLH locks
+//    but with parking or spin-then-park instead of pure spinning.
+// *  Use Taura-Oyama-Yonenzawa locks.
+// *  It's possible to construct a 1-0 lock if we encode the lockword as
+//    (List,LockByte).  Acquire will CAS the full lockword while Release
+//    will STB 0 into the LockByte.  The 1-0 scheme admits stranding, so
+//    acquiring threads use timers (ParkTimed) to detect and recover from
+//    the stranding window.  Thread/Node structures must be aligned on 256-byte
+//    boundaries by using placement-new.
+// *  Augment MCS with advisory back-link fields maintained with CAS().
+//    Pictorially:  LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner.
+//    The validity of the backlinks must be ratified before we trust the value.
+//    If the backlinks are invalid the exiting thread must back-track through the
+//    the forward links, which are always trustworthy.
+// *  Add a successor indication.  The LockWord is currently encoded as
+//    (List, LOCKBIT:1).  We could also add a SUCCBIT or an explicit _succ variable
+//    to provide the usual futile-wakeup optimization.
+//    See RTStt for details.
+// *  Consider schedctl.sc_nopreempt to cover the critical section.
+//
+typedef volatile intptr_t MutexT ;      // Mux Lock-word
+enum MuxBits { LOCKBIT = 1 } ;
+void Thread::muxAcquire (volatile intptr_t * Lock, const char * LockName) {
+intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
+if (w == 0) return ;
+if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+return ;
+}
+TEVENT (muxAcquire - Contention) ;
+ParkEvent * const Self = Thread::current()->_MuxEvent ;
+assert ((intptr_t(Self) & LOCKBIT) == 0, "invariant") ;
+for (;;) {
+int its = (os::is_MP() ? 100 : 0) + 1 ;
+// Optional spin phase: spin-then-park strategy
+while (--its >= 0) {
+w = *Lock ;
+if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+return ;
+}
+}
+Self->reset() ;
+Self->OnList = intptr_t(Lock) ;
+// The following fence() isn't _strictly necessary as the subsequent
+// CAS() both serializes execution and ratifies the fetched *Lock value.
+OrderAccess::fence();
+for (;;) {
+w = *Lock ;
+if ((w & LOCKBIT) == 0) {
+if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+Self->OnList = 0 ;   // hygiene - allows stronger asserts
+return ;
+}
+continue ;      // Interference -- *Lock changed -- Just retry
+}
+assert (w & LOCKBIT, "invariant") ;
+Self->ListNext = (ParkEvent *) (w & ~LOCKBIT );
+if (Atomic::cmpxchg_ptr (intptr_t(Self)|LOCKBIT, Lock, w) == w) break ;
+}
+while (Self->OnList != 0) {
+Self->park() ;
+}
+}
+}
+void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) {
+intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
+if (w == 0) return ;
+if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+return ;
+}
+TEVENT (muxAcquire - Contention) ;
+ParkEvent * ReleaseAfter = NULL ;
+if (ev == NULL) {
+ev = ReleaseAfter = ParkEvent::Allocate (NULL) ;
+}
+assert ((intptr_t(ev) & LOCKBIT) == 0, "invariant") ;
+for (;;) {
+guarantee (ev->OnList == 0, "invariant") ;
+int its = (os::is_MP() ? 100 : 0) + 1 ;
+// Optional spin phase: spin-then-park strategy
+while (--its >= 0) {
+w = *Lock ;
+if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+if (ReleaseAfter != NULL) {
+ParkEvent::Release (ReleaseAfter) ;
+}
+return ;
+}
+}
+ev->reset() ;
+ev->OnList = intptr_t(Lock) ;
+// The following fence() isn't _strictly necessary as the subsequent
+// CAS() both serializes execution and ratifies the fetched *Lock value.
+OrderAccess::fence();
 for (;;) {
-// 3: Try to reattach the residual list
+w = *Lock ;
-guarantee (List != NULL, "invariant") ;
+if ((w & LOCKBIT) == 0) {
-ParkEvent * Arv =  (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
+if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
-if (Arv == NULL) break ;
+ev->OnList = 0 ;
+// We call ::Release while holding the outer lock, thus
-// New nodes arrived.  Try to detach the recent arrivals.
+// artificially lengthening the critical section.
-if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
+// Consider deferring the ::Release() until the subsequent unlock(),
-continue ;
+// after we've dropped the outer lock.
+if (ReleaseAfter != NULL) {
+ParkEvent::Release (ReleaseAfter) ;
+}
+return ;
 }
-guarantee (Arv != NULL, "invariant") ;
+continue ;      // Interference -- *Lock changed -- Just retry
-// 4: Merge Arv into List
+}
-ParkEvent * Tail = List ;
+assert (w & LOCKBIT, "invariant") ;
-while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
+ev->ListNext = (ParkEvent *) (w & ~LOCKBIT );
-Tail->FreeNext = Arv ;
+if (Atomic::cmpxchg_ptr (intptr_t(ev)|LOCKBIT, Lock, w) == w) break ;
 }
-break ;
-}
+while (ev->OnList != 0) {
+ev->park() ;
-if (ev != NULL) {
+}
-guarantee (ev->AssociatedWith == NULL, "invariant") ;
+}
-} else {
+}
-// Do this the hard way -- materialize a new ParkEvent.
-// In rare cases an allocating thread might detach a long list --
+// Release() must extract a successor from the list and then wake that thread.
-// installing null into FreeList -- and then stall or be obstructed.
+// It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme
-// A 2nd thread calling Allocate() would see FreeList == null.
+// similar to that used by ParkEvent::Allocate() and ::Release().  DMR-based
-// The list held privately by the 1st thread is unavailable to the 2nd thread.
+// Release() would :
-// In that case the 2nd thread would have to materialize a new ParkEvent,
+// (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list.
-// even though free ParkEvents existed in the system.  In this case we end up
+// (B) Extract a successor from the private list "in-hand"
-// with more ParkEvents in circulation than we need, but the race is
+// (C) attempt to CAS() the residual back into *Lock over null.
-// rare and the outcome is benign.  Ideally, the # of extant ParkEvents
+//     If there were any newly arrived threads and the CAS() would fail.
-// is equal to the maximum # of threads that existed at any one time.
+//     In that case Release() would detach the RATs, re-merge the list in-hand
-// Because of the race mentioned above, segments of the freelist
+//     with the RATs and repeat as needed.  Alternately, Release() might
-// can be transiently inaccessible.  At worst we may end up with the
+//     detach and extract a successor, but then pass the residual list to the wakee.
-// # of ParkEvents in circulation slightly above the ideal.
+//     The wakee would be responsible for reattaching and remerging before it
-// Note that if we didn't have the TSM/immortal constraint, then
+//     competed for the lock.
-// when reattaching, above, we could trim the list.
+//
-ev = new ParkEvent () ;
+// Both "pop" and DMR are immune from ABA corruption -- there can be
-guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
+// multiple concurrent pushers, but only one popper or detacher.
-}
+// This implementation pops from the head of the list.  This is unfair,
-ev->reset() ;                     // courtesy to caller
+// but tends to provide excellent throughput as hot threads remain hot.
-ev->AssociatedWith = t ;          // Associate ev with t
+// (We wake recently run threads first).
-ev->FreeNext       = NULL ;
-return ev ;
+void Thread::muxRelease (volatile intptr_t * Lock)  {
-}
-void ParkEvent::Release (ParkEvent * ev) {
-if (ev == NULL) return ;
-guarantee (ev->FreeNext == NULL      , "invariant") ;
-ev->AssociatedWith = NULL ;
 for (;;) {
-// Push ev onto FreeList
+const intptr_t w = Atomic::cmpxchg_ptr (0, Lock, LOCKBIT) ;
-// The mechanism is "half" lock-free.
+assert (w & LOCKBIT, "invariant") ;
-ParkEvent * List = FreeList ;
+if (w == LOCKBIT) return ;
-ev->FreeNext = List ;
+ParkEvent * List = (ParkEvent *) (w & ~LOCKBIT) ;
-if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
+assert (List != NULL, "invariant") ;
-}
+assert (List->OnList == intptr_t(Lock), "invariant") ;
-}
+ParkEvent * nxt = List->ListNext ;
-// Override operator new and delete so we can ensure that the
+// The following CAS() releases the lock and pops the head element.
-// least significant byte of ParkEvent addresses is 0.
+if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) {
-// Beware that excessive address alignment is undesirable
+continue ;
-// as it can result in D$ index usage imbalance as
+}
-// well as bank access imbalance on Niagara-like platforms,
+List->OnList = 0 ;
-// although Niagara's hash function should help.
+OrderAccess::fence() ;
+List->unpark () ;
-void * ParkEvent::operator new (size_t sz) {
+return ;
-return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ;
+}
 }
-void ParkEvent::operator delete (void * a) {
-// ParkEvents are type-stable and immortal ...
-ShouldNotReachHere();
-}
-// 6399321 As a temporary measure we copied & modified the ParkEvent::
-// allocate() and release() code for use by Parkers.  The Parker:: forms
-// will eventually be removed as we consolide and shift over to ParkEvents
-// for both builtin synchronization and JSR166 operations.
-volatile int Parker::ListLock = 0 ;
-Parker * volatile Parker::FreeList = NULL ;
-Parker * Parker::Allocate (JavaThread * t) {
-guarantee (t != NULL, "invariant") ;
-Parker * p ;
-// Start by trying to recycle an existing but unassociated
-// Parker from the global free list.
-for (;;) {
-p = FreeList ;
-if (p  == NULL) break ;
-// 1: Detach
-// Tantamount to p = Swap (&FreeList, NULL)
-if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
-continue ;
-}
-// We've detached the list.  The list in-hand is now
-// local to this thread.   This thread can operate on the
-// list without risk of interference from other threads.
-// 2: Extract -- pop the 1st element from the list.
-Parker * List = p->FreeNext ;
-if (List == NULL) break ;
-for (;;) {
-// 3: Try to reattach the residual list
-guarantee (List != NULL, "invariant") ;
-Parker * Arv =  (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-if (Arv == NULL) break ;
-// New nodes arrived.  Try to detach the recent arrivals.
-if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-continue ;
-}
-guarantee (Arv != NULL, "invariant") ;
-// 4: Merge Arv into List
-Parker * Tail = List ;
-while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-Tail->FreeNext = Arv ;
-}
-break ;
-}
-if (p != NULL) {
-guarantee (p->AssociatedWith == NULL, "invariant") ;
-} else {
-// Do this the hard way -- materialize a new Parker..
-// In rare cases an allocating thread might detach
-// a long list -- installing null into FreeList --and
-// then stall.  Another thread calling Allocate() would see
-// FreeList == null and then invoke the ctor.  In this case we
-// end up with more Parkers in circulation than we need, but
-// the race is rare and the outcome is benign.
-// Ideally, the # of extant Parkers is equal to the
-// maximum # of threads that existed at any one time.
-// Because of the race mentioned above, segments of the
-// freelist can be transiently inaccessible.  At worst
-// we may end up with the # of Parkers in circulation
-// slightly above the ideal.
-p = new Parker() ;
-}
-p->AssociatedWith = t ;          // Associate p with t
-p->FreeNext       = NULL ;
-return p ;
-}
-void Parker::Release (Parker * p) {
-if (p == NULL) return ;
-guarantee (p->AssociatedWith != NULL, "invariant") ;
-guarantee (p->FreeNext == NULL      , "invariant") ;
-p->AssociatedWith = NULL ;
-for (;;) {
-// Push p onto FreeList
-Parker * List = FreeList ;
-p->FreeNext = List ;
-if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
-}
-}
 void Threads::verify() {
 ALL_JAVA_THREADS(p) {
 p->verify();
 }

Mercurial > hg > truffle

comparison src/share/vm/runtime/thread.cpp @ 1878:fa83ab460c54