comparison src/share/vm/runtime/thread.cpp @ 1878:fa83ab460c54

6988353: refactor contended sync subsystem Summary: reduce complexity by factoring synchronizer.cpp Reviewed-by: dholmes, never, coleenp
author acorn
date Fri, 22 Oct 2010 15:59:34 -0400
parents 75b0735b4d04
children 5caa30ea147b
comparison
equal deleted inserted replaced
1874:75ab0162aa84 1878:fa83ab460c54
2993 2993
2994 // Enable guard page *after* os::create_main_thread(), otherwise it would 2994 // Enable guard page *after* os::create_main_thread(), otherwise it would
2995 // crash Linux VM, see notes in os_linux.cpp. 2995 // crash Linux VM, see notes in os_linux.cpp.
2996 main_thread->create_stack_guard_pages(); 2996 main_thread->create_stack_guard_pages();
2997 2997
2998 // Initialize Java-Leve synchronization subsystem 2998 // Initialize Java-Level synchronization subsystem
2999 ObjectSynchronizer::Initialize() ; 2999 ObjectMonitor::Initialize() ;
3000 3000
3001 // Initialize global modules 3001 // Initialize global modules
3002 jint status = init_globals(); 3002 jint status = init_globals();
3003 if (status != JNI_OK) { 3003 if (status != JNI_OK) {
3004 delete main_thread; 3004 delete main_thread;
3963 current->print_on_error(st, buf, buflen); 3963 current->print_on_error(st, buf, buflen);
3964 st->cr(); 3964 st->cr();
3965 } 3965 }
3966 } 3966 }
3967 3967
3968 3968 // Internal SpinLock and Mutex
3969 // Lifecycle management for TSM ParkEvents. 3969 // Based on ParkEvent
3970 // ParkEvents are type-stable (TSM). 3970
3971 // In our particular implementation they happen to be immortal. 3971 // Ad-hoc mutual exclusion primitives: SpinLock and Mux
3972 // 3972 //
3973 // We manage concurrency on the FreeList with a CAS-based 3973 // We employ SpinLocks _only for low-contention, fixed-length
3974 // detach-modify-reattach idiom that avoids the ABA problems 3974 // short-duration critical sections where we're concerned
3975 // that would otherwise be present in a simple CAS-based 3975 // about native mutex_t or HotSpot Mutex:: latency.
3976 // push-pop implementation. (push-one and pop-all) 3976 // The mux construct provides a spin-then-block mutual exclusion
3977 // mechanism.
3977 // 3978 //
3978 // Caveat: Allocate() and Release() may be called from threads 3979 // Testing has shown that contention on the ListLock guarding gFreeList
3979 // other than the thread associated with the Event! 3980 // is common. If we implement ListLock as a simple SpinLock it's common
3980 // If we need to call Allocate() when running as the thread in 3981 // for the JVM to devolve to yielding with little progress. This is true
3981 // question then look for the PD calls to initialize native TLS. 3982 // despite the fact that the critical sections protected by ListLock are
3982 // Native TLS (Win32/Linux/Solaris) can only be initialized or 3983 // extremely short.
3983 // accessed by the associated thread.
3984 // See also pd_initialize().
3985 // 3984 //
3986 // Note that we could defer associating a ParkEvent with a thread 3985 // TODO-FIXME: ListLock should be of type SpinLock.
3987 // until the 1st time the thread calls park(). unpark() calls to 3986 // We should make this a 1st-class type, integrated into the lock
3988 // an unprovisioned thread would be ignored. The first park() call 3987 // hierarchy as leaf-locks. Critically, the SpinLock structure
3989 // for a thread would allocate and associate a ParkEvent and return 3988 // should have sufficient padding to avoid false-sharing and excessive
3990 // immediately. 3989 // cache-coherency traffic.
3991 3990
3992 volatile int ParkEvent::ListLock = 0 ; 3991
3993 ParkEvent * volatile ParkEvent::FreeList = NULL ; 3992 typedef volatile int SpinLockT ;
3994 3993
3995 ParkEvent * ParkEvent::Allocate (Thread * t) { 3994 void Thread::SpinAcquire (volatile int * adr, const char * LockName) {
3996 // In rare cases -- JVM_RawMonitor* operations -- we can find t == null. 3995 if (Atomic::cmpxchg (1, adr, 0) == 0) {
3997 ParkEvent * ev ; 3996 return ; // normal fast-path return
3998 3997 }
3999 // Start by trying to recycle an existing but unassociated 3998
4000 // ParkEvent from the global free list. 3999 // Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
4000 TEVENT (SpinAcquire - ctx) ;
4001 int ctr = 0 ;
4002 int Yields = 0 ;
4001 for (;;) { 4003 for (;;) {
4002 ev = FreeList ; 4004 while (*adr != 0) {
4003 if (ev == NULL) break ; 4005 ++ctr ;
4004 // 1: Detach - sequester or privatize the list 4006 if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
4005 // Tantamount to ev = Swap (&FreeList, NULL) 4007 if (Yields > 5) {
4006 if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) { 4008 // Consider using a simple NakedSleep() instead.
4007 continue ; 4009 // Then SpinAcquire could be called by non-JVM threads
4008 } 4010 Thread::current()->_ParkEvent->park(1) ;
4009 4011 } else {
4010 // We've detached the list. The list in-hand is now 4012 os::NakedYield() ;
4011 // local to this thread. This thread can operate on the 4013 ++Yields ;
4012 // list without risk of interference from other threads. 4014 }
4013 // 2: Extract -- pop the 1st element from the list. 4015 } else {
4014 ParkEvent * List = ev->FreeNext ; 4016 SpinPause() ;
4015 if (List == NULL) break ; 4017 }
4018 }
4019 if (Atomic::cmpxchg (1, adr, 0) == 0) return ;
4020 }
4021 }
4022
4023 void Thread::SpinRelease (volatile int * adr) {
4024 assert (*adr != 0, "invariant") ;
4025 OrderAccess::fence() ; // guarantee at least release consistency.
4026 // Roach-motel semantics.
4027 // It's safe if subsequent LDs and STs float "up" into the critical section,
4028 // but prior LDs and STs within the critical section can't be allowed
4029 // to reorder or float past the ST that releases the lock.
4030 *adr = 0 ;
4031 }
4032
4033 // muxAcquire and muxRelease:
4034 //
4035 // * muxAcquire and muxRelease support a single-word lock-word construct.
4036 // The LSB of the word is set IFF the lock is held.
4037 // The remainder of the word points to the head of a singly-linked list
4038 // of threads blocked on the lock.
4039 //
4040 // * The current implementation of muxAcquire-muxRelease uses its own
4041 // dedicated Thread._MuxEvent instance. If we're interested in
4042 // minimizing the peak number of extant ParkEvent instances then
4043 // we could eliminate _MuxEvent and "borrow" _ParkEvent as long
4044 // as certain invariants were satisfied. Specifically, care would need
4045 // to be taken with regards to consuming unpark() "permits".
4046 // A safe rule of thumb is that a thread would never call muxAcquire()
4047 // if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently
4048 // park(). Otherwise the _ParkEvent park() operation in muxAcquire() could
4049 // consume an unpark() permit intended for monitorenter, for instance.
4050 // One way around this would be to widen the restricted-range semaphore
4051 // implemented in park(). Another alternative would be to provide
4052 // multiple instances of the PlatformEvent() for each thread. One
4053 // instance would be dedicated to muxAcquire-muxRelease, for instance.
4054 //
4055 // * Usage:
4056 // -- Only as leaf locks
4057 // -- for short-term locking only as muxAcquire does not perform
4058 // thread state transitions.
4059 //
4060 // Alternatives:
4061 // * We could implement muxAcquire and muxRelease with MCS or CLH locks
4062 // but with parking or spin-then-park instead of pure spinning.
4063 // * Use Taura-Oyama-Yonenzawa locks.
4064 // * It's possible to construct a 1-0 lock if we encode the lockword as
4065 // (List,LockByte). Acquire will CAS the full lockword while Release
4066 // will STB 0 into the LockByte. The 1-0 scheme admits stranding, so
4067 // acquiring threads use timers (ParkTimed) to detect and recover from
4068 // the stranding window. Thread/Node structures must be aligned on 256-byte
4069 // boundaries by using placement-new.
4070 // * Augment MCS with advisory back-link fields maintained with CAS().
4071 // Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner.
4072 // The validity of the backlinks must be ratified before we trust the value.
4073 // If the backlinks are invalid the exiting thread must back-track through the
4074 // the forward links, which are always trustworthy.
4075 // * Add a successor indication. The LockWord is currently encoded as
4076 // (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable
4077 // to provide the usual futile-wakeup optimization.
4078 // See RTStt for details.
4079 // * Consider schedctl.sc_nopreempt to cover the critical section.
4080 //
4081
4082
4083 typedef volatile intptr_t MutexT ; // Mux Lock-word
4084 enum MuxBits { LOCKBIT = 1 } ;
4085
4086 void Thread::muxAcquire (volatile intptr_t * Lock, const char * LockName) {
4087 intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
4088 if (w == 0) return ;
4089 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4090 return ;
4091 }
4092
4093 TEVENT (muxAcquire - Contention) ;
4094 ParkEvent * const Self = Thread::current()->_MuxEvent ;
4095 assert ((intptr_t(Self) & LOCKBIT) == 0, "invariant") ;
4096 for (;;) {
4097 int its = (os::is_MP() ? 100 : 0) + 1 ;
4098
4099 // Optional spin phase: spin-then-park strategy
4100 while (--its >= 0) {
4101 w = *Lock ;
4102 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4103 return ;
4104 }
4105 }
4106
4107 Self->reset() ;
4108 Self->OnList = intptr_t(Lock) ;
4109 // The following fence() isn't _strictly necessary as the subsequent
4110 // CAS() both serializes execution and ratifies the fetched *Lock value.
4111 OrderAccess::fence();
4112 for (;;) {
4113 w = *Lock ;
4114 if ((w & LOCKBIT) == 0) {
4115 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4116 Self->OnList = 0 ; // hygiene - allows stronger asserts
4117 return ;
4118 }
4119 continue ; // Interference -- *Lock changed -- Just retry
4120 }
4121 assert (w & LOCKBIT, "invariant") ;
4122 Self->ListNext = (ParkEvent *) (w & ~LOCKBIT );
4123 if (Atomic::cmpxchg_ptr (intptr_t(Self)|LOCKBIT, Lock, w) == w) break ;
4124 }
4125
4126 while (Self->OnList != 0) {
4127 Self->park() ;
4128 }
4129 }
4130 }
4131
4132 void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) {
4133 intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
4134 if (w == 0) return ;
4135 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4136 return ;
4137 }
4138
4139 TEVENT (muxAcquire - Contention) ;
4140 ParkEvent * ReleaseAfter = NULL ;
4141 if (ev == NULL) {
4142 ev = ReleaseAfter = ParkEvent::Allocate (NULL) ;
4143 }
4144 assert ((intptr_t(ev) & LOCKBIT) == 0, "invariant") ;
4145 for (;;) {
4146 guarantee (ev->OnList == 0, "invariant") ;
4147 int its = (os::is_MP() ? 100 : 0) + 1 ;
4148
4149 // Optional spin phase: spin-then-park strategy
4150 while (--its >= 0) {
4151 w = *Lock ;
4152 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4153 if (ReleaseAfter != NULL) {
4154 ParkEvent::Release (ReleaseAfter) ;
4155 }
4156 return ;
4157 }
4158 }
4159
4160 ev->reset() ;
4161 ev->OnList = intptr_t(Lock) ;
4162 // The following fence() isn't _strictly necessary as the subsequent
4163 // CAS() both serializes execution and ratifies the fetched *Lock value.
4164 OrderAccess::fence();
4016 for (;;) { 4165 for (;;) {
4017 // 3: Try to reattach the residual list 4166 w = *Lock ;
4018 guarantee (List != NULL, "invariant") ; 4167 if ((w & LOCKBIT) == 0) {
4019 ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; 4168 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
4020 if (Arv == NULL) break ; 4169 ev->OnList = 0 ;
4021 4170 // We call ::Release while holding the outer lock, thus
4022 // New nodes arrived. Try to detach the recent arrivals. 4171 // artificially lengthening the critical section.
4023 if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { 4172 // Consider deferring the ::Release() until the subsequent unlock(),
4024 continue ; 4173 // after we've dropped the outer lock.
4174 if (ReleaseAfter != NULL) {
4175 ParkEvent::Release (ReleaseAfter) ;
4176 }
4177 return ;
4025 } 4178 }
4026 guarantee (Arv != NULL, "invariant") ; 4179 continue ; // Interference -- *Lock changed -- Just retry
4027 // 4: Merge Arv into List 4180 }
4028 ParkEvent * Tail = List ; 4181 assert (w & LOCKBIT, "invariant") ;
4029 while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; 4182 ev->ListNext = (ParkEvent *) (w & ~LOCKBIT );
4030 Tail->FreeNext = Arv ; 4183 if (Atomic::cmpxchg_ptr (intptr_t(ev)|LOCKBIT, Lock, w) == w) break ;
4031 } 4184 }
4032 break ; 4185
4033 } 4186 while (ev->OnList != 0) {
4034 4187 ev->park() ;
4035 if (ev != NULL) { 4188 }
4036 guarantee (ev->AssociatedWith == NULL, "invariant") ; 4189 }
4037 } else { 4190 }
4038 // Do this the hard way -- materialize a new ParkEvent. 4191
4039 // In rare cases an allocating thread might detach a long list -- 4192 // Release() must extract a successor from the list and then wake that thread.
4040 // installing null into FreeList -- and then stall or be obstructed. 4193 // It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme
4041 // A 2nd thread calling Allocate() would see FreeList == null. 4194 // similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based
4042 // The list held privately by the 1st thread is unavailable to the 2nd thread. 4195 // Release() would :
4043 // In that case the 2nd thread would have to materialize a new ParkEvent, 4196 // (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list.
4044 // even though free ParkEvents existed in the system. In this case we end up 4197 // (B) Extract a successor from the private list "in-hand"
4045 // with more ParkEvents in circulation than we need, but the race is 4198 // (C) attempt to CAS() the residual back into *Lock over null.
4046 // rare and the outcome is benign. Ideally, the # of extant ParkEvents 4199 // If there were any newly arrived threads and the CAS() would fail.
4047 // is equal to the maximum # of threads that existed at any one time. 4200 // In that case Release() would detach the RATs, re-merge the list in-hand
4048 // Because of the race mentioned above, segments of the freelist 4201 // with the RATs and repeat as needed. Alternately, Release() might
4049 // can be transiently inaccessible. At worst we may end up with the 4202 // detach and extract a successor, but then pass the residual list to the wakee.
4050 // # of ParkEvents in circulation slightly above the ideal. 4203 // The wakee would be responsible for reattaching and remerging before it
4051 // Note that if we didn't have the TSM/immortal constraint, then 4204 // competed for the lock.
4052 // when reattaching, above, we could trim the list. 4205 //
4053 ev = new ParkEvent () ; 4206 // Both "pop" and DMR are immune from ABA corruption -- there can be
4054 guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ; 4207 // multiple concurrent pushers, but only one popper or detacher.
4055 } 4208 // This implementation pops from the head of the list. This is unfair,
4056 ev->reset() ; // courtesy to caller 4209 // but tends to provide excellent throughput as hot threads remain hot.
4057 ev->AssociatedWith = t ; // Associate ev with t 4210 // (We wake recently run threads first).
4058 ev->FreeNext = NULL ; 4211
4059 return ev ; 4212 void Thread::muxRelease (volatile intptr_t * Lock) {
4060 }
4061
4062 void ParkEvent::Release (ParkEvent * ev) {
4063 if (ev == NULL) return ;
4064 guarantee (ev->FreeNext == NULL , "invariant") ;
4065 ev->AssociatedWith = NULL ;
4066 for (;;) { 4213 for (;;) {
4067 // Push ev onto FreeList 4214 const intptr_t w = Atomic::cmpxchg_ptr (0, Lock, LOCKBIT) ;
4068 // The mechanism is "half" lock-free. 4215 assert (w & LOCKBIT, "invariant") ;
4069 ParkEvent * List = FreeList ; 4216 if (w == LOCKBIT) return ;
4070 ev->FreeNext = List ; 4217 ParkEvent * List = (ParkEvent *) (w & ~LOCKBIT) ;
4071 if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ; 4218 assert (List != NULL, "invariant") ;
4072 } 4219 assert (List->OnList == intptr_t(Lock), "invariant") ;
4073 } 4220 ParkEvent * nxt = List->ListNext ;
4074 4221
4075 // Override operator new and delete so we can ensure that the 4222 // The following CAS() releases the lock and pops the head element.
4076 // least significant byte of ParkEvent addresses is 0. 4223 if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) {
4077 // Beware that excessive address alignment is undesirable 4224 continue ;
4078 // as it can result in D$ index usage imbalance as 4225 }
4079 // well as bank access imbalance on Niagara-like platforms, 4226 List->OnList = 0 ;
4080 // although Niagara's hash function should help. 4227 OrderAccess::fence() ;
4081 4228 List->unpark () ;
4082 void * ParkEvent::operator new (size_t sz) { 4229 return ;
4083 return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ; 4230 }
4084 } 4231 }
4085 4232
4086 void ParkEvent::operator delete (void * a) {
4087 // ParkEvents are type-stable and immortal ...
4088 ShouldNotReachHere();
4089 }
4090
4091
4092 // 6399321 As a temporary measure we copied & modified the ParkEvent::
4093 // allocate() and release() code for use by Parkers. The Parker:: forms
4094 // will eventually be removed as we consolide and shift over to ParkEvents
4095 // for both builtin synchronization and JSR166 operations.
4096
4097 volatile int Parker::ListLock = 0 ;
4098 Parker * volatile Parker::FreeList = NULL ;
4099
4100 Parker * Parker::Allocate (JavaThread * t) {
4101 guarantee (t != NULL, "invariant") ;
4102 Parker * p ;
4103
4104 // Start by trying to recycle an existing but unassociated
4105 // Parker from the global free list.
4106 for (;;) {
4107 p = FreeList ;
4108 if (p == NULL) break ;
4109 // 1: Detach
4110 // Tantamount to p = Swap (&FreeList, NULL)
4111 if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
4112 continue ;
4113 }
4114
4115 // We've detached the list. The list in-hand is now
4116 // local to this thread. This thread can operate on the
4117 // list without risk of interference from other threads.
4118 // 2: Extract -- pop the 1st element from the list.
4119 Parker * List = p->FreeNext ;
4120 if (List == NULL) break ;
4121 for (;;) {
4122 // 3: Try to reattach the residual list
4123 guarantee (List != NULL, "invariant") ;
4124 Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
4125 if (Arv == NULL) break ;
4126
4127 // New nodes arrived. Try to detach the recent arrivals.
4128 if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
4129 continue ;
4130 }
4131 guarantee (Arv != NULL, "invariant") ;
4132 // 4: Merge Arv into List
4133 Parker * Tail = List ;
4134 while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
4135 Tail->FreeNext = Arv ;
4136 }
4137 break ;
4138 }
4139
4140 if (p != NULL) {
4141 guarantee (p->AssociatedWith == NULL, "invariant") ;
4142 } else {
4143 // Do this the hard way -- materialize a new Parker..
4144 // In rare cases an allocating thread might detach
4145 // a long list -- installing null into FreeList --and
4146 // then stall. Another thread calling Allocate() would see
4147 // FreeList == null and then invoke the ctor. In this case we
4148 // end up with more Parkers in circulation than we need, but
4149 // the race is rare and the outcome is benign.
4150 // Ideally, the # of extant Parkers is equal to the
4151 // maximum # of threads that existed at any one time.
4152 // Because of the race mentioned above, segments of the
4153 // freelist can be transiently inaccessible. At worst
4154 // we may end up with the # of Parkers in circulation
4155 // slightly above the ideal.
4156 p = new Parker() ;
4157 }
4158 p->AssociatedWith = t ; // Associate p with t
4159 p->FreeNext = NULL ;
4160 return p ;
4161 }
4162
4163
4164 void Parker::Release (Parker * p) {
4165 if (p == NULL) return ;
4166 guarantee (p->AssociatedWith != NULL, "invariant") ;
4167 guarantee (p->FreeNext == NULL , "invariant") ;
4168 p->AssociatedWith = NULL ;
4169 for (;;) {
4170 // Push p onto FreeList
4171 Parker * List = FreeList ;
4172 p->FreeNext = List ;
4173 if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
4174 }
4175 }
4176 4233
4177 void Threads::verify() { 4234 void Threads::verify() {
4178 ALL_JAVA_THREADS(p) { 4235 ALL_JAVA_THREADS(p) {
4179 p->verify(); 4236 p->verify();
4180 } 4237 }