diff src/share/vm/runtime/thread.cpp @ 1930:2d26b0046e0d

Merge.
author Thomas Wuerthinger <wuerthinger@ssw.jku.at>
date Tue, 30 Nov 2010 14:53:30 +0100
parents 7cf1952ec5fb 5caa30ea147b
children 06f017f7daa7
line wrap: on
line diff
--- a/src/share/vm/runtime/thread.cpp	Mon Nov 29 18:32:30 2010 +0100
+++ b/src/share/vm/runtime/thread.cpp	Tue Nov 30 14:53:30 2010 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -16,9 +16,9 @@
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  *
  */
 
@@ -139,6 +139,8 @@
   omFreeList = NULL ;
   omFreeCount = 0 ;
   omFreeProvision = 32 ;
+  omInUseList = NULL ;
+  omInUseCount = 0 ;
 
   _SR_lock = new Monitor(Mutex::suspend_resume, "SR_lock", true);
   _suspend_flags = 0;
@@ -805,7 +807,7 @@
 // should be revisited, and they should be removed if possible.
 
 bool Thread::is_lock_owned(address adr) const {
-  return (_stack_base >= adr && adr >= (_stack_base - _stack_size));
+  return on_local_stack(adr);
 }
 
 bool Thread::set_as_starting_thread() {
@@ -1018,7 +1020,7 @@
 // timer interrupts exists on the platform.
 
 WatcherThread* WatcherThread::_watcher_thread   = NULL;
-bool           WatcherThread::_should_terminate = false;
+volatile bool  WatcherThread::_should_terminate = false;
 
 WatcherThread::WatcherThread() : Thread() {
   assert(watcher_thread() == NULL, "we can only allocate one WatcherThread");
@@ -1050,8 +1052,26 @@
 
     // Calculate how long it'll be until the next PeriodicTask work
     // should be done, and sleep that amount of time.
-    const size_t time_to_wait = PeriodicTask::time_to_wait();
-    os::sleep(this, time_to_wait, false);
+    size_t time_to_wait = PeriodicTask::time_to_wait();
+
+    // we expect this to timeout - we only ever get unparked when
+    // we should terminate
+    {
+      OSThreadWaitState osts(this->osthread(), false /* not Object.wait() */);
+
+      jlong prev_time = os::javaTimeNanos();
+      for (;;) {
+        int res= _SleepEvent->park(time_to_wait);
+        if (res == OS_TIMEOUT || _should_terminate)
+          break;
+        // spurious wakeup of some kind
+        jlong now = os::javaTimeNanos();
+        time_to_wait -= (now - prev_time) / 1000000;
+        if (time_to_wait <= 0)
+          break;
+        prev_time = now;
+      }
+    }
 
     if (is_error_reported()) {
       // A fatal error has happened, the error handler(VMError::report_and_die)
@@ -1113,6 +1133,12 @@
   // it is ok to take late safepoints here, if needed
   MutexLocker mu(Terminator_lock);
   _should_terminate = true;
+  OrderAccess::fence();  // ensure WatcherThread sees update in main loop
+
+  Thread* watcher = watcher_thread();
+  if (watcher != NULL)
+    watcher->_SleepEvent->unpark();
+
   while(watcher_thread() != NULL) {
     // This wait should make safepoint checks, wait without a timeout,
     // and wait as a suspend-equivalent condition.
@@ -1157,6 +1183,7 @@
   set_vframe_array_last(NULL);
   set_deferred_locals(NULL);
   set_deopt_mark(NULL);
+  set_deopt_nmethod(NULL);
   clear_must_deopt_id();
   set_monitor_chunks(NULL);
   set_next(NULL);
@@ -1172,6 +1199,7 @@
   _exception_pc  = 0;
   _exception_handler_pc = 0;
   _exception_stack_size = 0;
+  _is_method_handle_return = 0;
   _jvmti_thread_state= NULL;
   _should_post_on_exceptions_flag = JNI_FALSE;
   _jvmti_get_loaded_classes_closure = NULL;
@@ -1362,6 +1390,8 @@
 
   this->create_stack_guard_pages();
 
+  this->cache_global_variables();
+
   // Thread is now sufficient initialized to be handled by the safepoint code as being
   // in the VM. Change thread state from _thread_new to _thread_in_vm
   ThreadStateTransition::transition_and_fence(this, _thread_new, _thread_in_vm);
@@ -1616,7 +1646,29 @@
   satb_mark_queue().flush();
   dirty_card_queue().flush();
 }
-#endif
+
+void JavaThread::initialize_queues() {
+  assert(!SafepointSynchronize::is_at_safepoint(),
+         "we should not be at a safepoint");
+
+  ObjPtrQueue& satb_queue = satb_mark_queue();
+  SATBMarkQueueSet& satb_queue_set = satb_mark_queue_set();
+  // The SATB queue should have been constructed with its active
+  // field set to false.
+  assert(!satb_queue.is_active(), "SATB queue should not be active");
+  assert(satb_queue.is_empty(), "SATB queue should be empty");
+  // If we are creating the thread during a marking cycle, we should
+  // set the active field of the SATB queue to true.
+  if (satb_queue_set.is_active()) {
+    satb_queue.set_active(true);
+  }
+
+  DirtyCardQueue& dirty_queue = dirty_card_queue();
+  // The dirty card queue should have been constructed with its
+  // active field set to true.
+  assert(dirty_queue.is_active(), "dirty card queue should be active");
+}
+#endif // !SERIALGC
 
 void JavaThread::cleanup_failed_attach_current_thread() {
   if (get_thread_profiler() != NULL) {
@@ -2084,8 +2136,7 @@
     }
     if (f.id() == thread->must_deopt_id()) {
       thread->clear_must_deopt_id();
-      // Since we know we're safe to deopt the current state is a safe state
-      f.deoptimize(thread, true);
+      f.deoptimize(thread);
     } else {
       fatal("missed deoptimization!");
     }
@@ -2700,7 +2751,7 @@
   if (in_bytes(size_in_bytes) != 0) {
     _popframe_preserved_args = NEW_C_HEAP_ARRAY(char, in_bytes(size_in_bytes));
     _popframe_preserved_args_size = in_bytes(size_in_bytes);
-    Copy::conjoint_bytes(start, _popframe_preserved_args, _popframe_preserved_args_size);
+    Copy::conjoint_jbytes(start, _popframe_preserved_args, _popframe_preserved_args_size);
   }
 }
 
@@ -2800,6 +2851,7 @@
   _queue = queue;
   _counters = counters;
   _is_compiling = false;
+  _buffer_blob = NULL;
 
 #ifndef PRODUCT
   _ideal_graph_printer = NULL;
@@ -2873,6 +2925,9 @@
   // So that JDK version can be used as a discrimintor when parsing arguments
   JDK_Version_init();
 
+  // Update/Initialize System properties after JDK version number is known
+  Arguments::init_version_specific_system_properties();
+
   // Parse arguments
   jint parse_result = Arguments::parse(args);
   if (parse_result != JNI_OK) return parse_result;
@@ -2944,8 +2999,8 @@
   // crash Linux VM, see notes in os_linux.cpp.
   main_thread->create_stack_guard_pages();
 
-  // Initialize Java-Leve synchronization subsystem
-  ObjectSynchronizer::Initialize() ;
+  // Initialize Java-Level synchronization subsystem
+  ObjectMonitor::Initialize() ;
 
   // Initialize global modules
   jint status = init_globals();
@@ -2955,6 +3010,9 @@
     return status;
   }
 
+  // Should be done after the heap is fully created
+  main_thread->cache_global_variables();
+
   HandleMark hm;
 
   { MutexLocker mu(Threads_lock);
@@ -3230,6 +3288,9 @@
     WatcherThread::start();
   }
 
+  // Give os specific code one last chance to start
+  os::init_3();
+
   create_vm_timer.end();
   return JNI_OK;
 }
@@ -3249,12 +3310,18 @@
     char buffer[JVM_MAXPATHLEN];
     char ebuf[1024];
     const char *name = agent->name();
+    const char *msg = "Could not find agent library ";
 
     if (agent->is_absolute_path()) {
       library = hpi::dll_load(name, ebuf, sizeof ebuf);
       if (library == NULL) {
+        const char *sub_msg = " in absolute path, with error: ";
+        size_t len = strlen(msg) + strlen(name) + strlen(sub_msg) + strlen(ebuf) + 1;
+        char *buf = NEW_C_HEAP_ARRAY(char, len);
+        jio_snprintf(buf, len, "%s%s%s%s", msg, name, sub_msg, ebuf);
         // If we can't find the agent, exit.
-        vm_exit_during_initialization("Could not find agent library in absolute path", name);
+        vm_exit_during_initialization(buf, NULL);
+        FREE_C_HEAP_ARRAY(char, buf);
       }
     } else {
       // Try to load the agent from the standard dll directory
@@ -3267,17 +3334,17 @@
         char *home  = Arguments::get_java_home();
         const char *fmt   = "%s/bin/java %s -Dkernel.background.download=false"
                       " sun.jkernel.DownloadManager -download client_jvm";
-        int length = strlen(props) + strlen(home) + strlen(fmt) + 1;
-        char *cmd = AllocateHeap(length);
+        size_t length = strlen(props) + strlen(home) + strlen(fmt) + 1;
+        char *cmd = NEW_C_HEAP_ARRAY(char, length);
         jio_snprintf(cmd, length, fmt, home, props);
         int status = os::fork_and_exec(cmd);
         FreeHeap(props);
-        FreeHeap(cmd);
         if (status == -1) {
           warning(cmd);
           vm_exit_during_initialization("fork_and_exec failed: %s",
                                          strerror(errno));
         }
+        FREE_C_HEAP_ARRAY(char, cmd);
         // when this comes back the instrument.dll should be where it belongs.
         library = hpi::dll_load(buffer, ebuf, sizeof ebuf);
       }
@@ -3287,8 +3354,13 @@
         hpi::dll_build_name(buffer, sizeof(buffer), ns, name);
         library = hpi::dll_load(buffer, ebuf, sizeof ebuf);
         if (library == NULL) {
+          const char *sub_msg = " on the library path, with error: ";
+          size_t len = strlen(msg) + strlen(name) + strlen(sub_msg) + strlen(ebuf) + 1;
+          char *buf = NEW_C_HEAP_ARRAY(char, len);
+          jio_snprintf(buf, len, "%s%s%s%s", msg, name, sub_msg, ebuf);
           // If we can't find the agent, exit.
-          vm_exit_during_initialization("Could not find agent library on the library path or in the local directory", name);
+          vm_exit_during_initialization(buf, NULL);
+          FREE_C_HEAP_ARRAY(char, buf);
         }
       }
     }
@@ -3584,6 +3656,10 @@
 void Threads::add(JavaThread* p, bool force_daemon) {
   // The threads lock must be owned at this point
   assert_locked_or_safepoint(Threads_lock);
+
+  // See the comment for this method in thread.hpp for its purpose and
+  // why it is called here.
+  p->initialize_queues();
   p->set_next(_thread_list);
   _thread_list = p;
   _number_of_threads++;
@@ -3895,215 +3971,272 @@
   }
 }
 
-
-// Lifecycle management for TSM ParkEvents.
-// ParkEvents are type-stable (TSM).
-// In our particular implementation they happen to be immortal.
+// Internal SpinLock and Mutex
+// Based on ParkEvent
+
+// Ad-hoc mutual exclusion primitives: SpinLock and Mux
 //
-// We manage concurrency on the FreeList with a CAS-based
-// detach-modify-reattach idiom that avoids the ABA problems
-// that would otherwise be present in a simple CAS-based
-// push-pop implementation.   (push-one and pop-all)
+// We employ SpinLocks _only for low-contention, fixed-length
+// short-duration critical sections where we're concerned
+// about native mutex_t or HotSpot Mutex:: latency.
+// The mux construct provides a spin-then-block mutual exclusion
+// mechanism.
+//
+// Testing has shown that contention on the ListLock guarding gFreeList
+// is common.  If we implement ListLock as a simple SpinLock it's common
+// for the JVM to devolve to yielding with little progress.  This is true
+// despite the fact that the critical sections protected by ListLock are
+// extremely short.
 //
-// Caveat: Allocate() and Release() may be called from threads
-// other than the thread associated with the Event!
-// If we need to call Allocate() when running as the thread in
-// question then look for the PD calls to initialize native TLS.
-// Native TLS (Win32/Linux/Solaris) can only be initialized or
-// accessed by the associated thread.
-// See also pd_initialize().
-//
-// Note that we could defer associating a ParkEvent with a thread
-// until the 1st time the thread calls park().  unpark() calls to
-// an unprovisioned thread would be ignored.  The first park() call
-// for a thread would allocate and associate a ParkEvent and return
-// immediately.
-
-volatile int ParkEvent::ListLock = 0 ;
-ParkEvent * volatile ParkEvent::FreeList = NULL ;
-
-ParkEvent * ParkEvent::Allocate (Thread * t) {
-  // In rare cases -- JVM_RawMonitor* operations -- we can find t == null.
-  ParkEvent * ev ;
-
-  // Start by trying to recycle an existing but unassociated
-  // ParkEvent from the global free list.
+// TODO-FIXME: ListLock should be of type SpinLock.
+// We should make this a 1st-class type, integrated into the lock
+// hierarchy as leaf-locks.  Critically, the SpinLock structure
+// should have sufficient padding to avoid false-sharing and excessive
+// cache-coherency traffic.
+
+
+typedef volatile int SpinLockT ;
+
+void Thread::SpinAcquire (volatile int * adr, const char * LockName) {
+  if (Atomic::cmpxchg (1, adr, 0) == 0) {
+     return ;   // normal fast-path return
+  }
+
+  // Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
+  TEVENT (SpinAcquire - ctx) ;
+  int ctr = 0 ;
+  int Yields = 0 ;
   for (;;) {
-    ev = FreeList ;
-    if (ev == NULL) break ;
-    // 1: Detach - sequester or privatize the list
-    // Tantamount to ev = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
-       continue ;
-    }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    ParkEvent * List = ev->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        ParkEvent * Arv =  (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
+     while (*adr != 0) {
+        ++ctr ;
+        if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
+           if (Yields > 5) {
+             // Consider using a simple NakedSleep() instead.
+             // Then SpinAcquire could be called by non-JVM threads
+             Thread::current()->_ParkEvent->park(1) ;
+           } else {
+             os::NakedYield() ;
+             ++Yields ;
+           }
+        } else {
+           SpinPause() ;
         }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        ParkEvent * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
-  }
-
-  if (ev != NULL) {
-    guarantee (ev->AssociatedWith == NULL, "invariant") ;
-  } else {
-    // Do this the hard way -- materialize a new ParkEvent.
-    // In rare cases an allocating thread might detach a long list --
-    // installing null into FreeList -- and then stall or be obstructed.
-    // A 2nd thread calling Allocate() would see FreeList == null.
-    // The list held privately by the 1st thread is unavailable to the 2nd thread.
-    // In that case the 2nd thread would have to materialize a new ParkEvent,
-    // even though free ParkEvents existed in the system.  In this case we end up
-    // with more ParkEvents in circulation than we need, but the race is
-    // rare and the outcome is benign.  Ideally, the # of extant ParkEvents
-    // is equal to the maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the freelist
-    // can be transiently inaccessible.  At worst we may end up with the
-    // # of ParkEvents in circulation slightly above the ideal.
-    // Note that if we didn't have the TSM/immortal constraint, then
-    // when reattaching, above, we could trim the list.
-    ev = new ParkEvent () ;
-    guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
-  }
-  ev->reset() ;                     // courtesy to caller
-  ev->AssociatedWith = t ;          // Associate ev with t
-  ev->FreeNext       = NULL ;
-  return ev ;
-}
-
-void ParkEvent::Release (ParkEvent * ev) {
-  if (ev == NULL) return ;
-  guarantee (ev->FreeNext == NULL      , "invariant") ;
-  ev->AssociatedWith = NULL ;
-  for (;;) {
-    // Push ev onto FreeList
-    // The mechanism is "half" lock-free.
-    ParkEvent * List = FreeList ;
-    ev->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
+     }
+     if (Atomic::cmpxchg (1, adr, 0) == 0) return ;
   }
 }
 
-// Override operator new and delete so we can ensure that the
-// least significant byte of ParkEvent addresses is 0.
-// Beware that excessive address alignment is undesirable
-// as it can result in D$ index usage imbalance as
-// well as bank access imbalance on Niagara-like platforms,
-// although Niagara's hash function should help.
-
-void * ParkEvent::operator new (size_t sz) {
-  return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ;
-}
-
-void ParkEvent::operator delete (void * a) {
-  // ParkEvents are type-stable and immortal ...
-  ShouldNotReachHere();
+void Thread::SpinRelease (volatile int * adr) {
+  assert (*adr != 0, "invariant") ;
+  OrderAccess::fence() ;      // guarantee at least release consistency.
+  // Roach-motel semantics.
+  // It's safe if subsequent LDs and STs float "up" into the critical section,
+  // but prior LDs and STs within the critical section can't be allowed
+  // to reorder or float past the ST that releases the lock.
+  *adr = 0 ;
 }
 
-
-// 6399321 As a temporary measure we copied & modified the ParkEvent::
-// allocate() and release() code for use by Parkers.  The Parker:: forms
-// will eventually be removed as we consolide and shift over to ParkEvents
-// for both builtin synchronization and JSR166 operations.
-
-volatile int Parker::ListLock = 0 ;
-Parker * volatile Parker::FreeList = NULL ;
-
-Parker * Parker::Allocate (JavaThread * t) {
-  guarantee (t != NULL, "invariant") ;
-  Parker * p ;
-
-  // Start by trying to recycle an existing but unassociated
-  // Parker from the global free list.
+// muxAcquire and muxRelease:
+//
+// *  muxAcquire and muxRelease support a single-word lock-word construct.
+//    The LSB of the word is set IFF the lock is held.
+//    The remainder of the word points to the head of a singly-linked list
+//    of threads blocked on the lock.
+//
+// *  The current implementation of muxAcquire-muxRelease uses its own
+//    dedicated Thread._MuxEvent instance.  If we're interested in
+//    minimizing the peak number of extant ParkEvent instances then
+//    we could eliminate _MuxEvent and "borrow" _ParkEvent as long
+//    as certain invariants were satisfied.  Specifically, care would need
+//    to be taken with regards to consuming unpark() "permits".
+//    A safe rule of thumb is that a thread would never call muxAcquire()
+//    if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently
+//    park().  Otherwise the _ParkEvent park() operation in muxAcquire() could
+//    consume an unpark() permit intended for monitorenter, for instance.
+//    One way around this would be to widen the restricted-range semaphore
+//    implemented in park().  Another alternative would be to provide
+//    multiple instances of the PlatformEvent() for each thread.  One
+//    instance would be dedicated to muxAcquire-muxRelease, for instance.
+//
+// *  Usage:
+//    -- Only as leaf locks
+//    -- for short-term locking only as muxAcquire does not perform
+//       thread state transitions.
+//
+// Alternatives:
+// *  We could implement muxAcquire and muxRelease with MCS or CLH locks
+//    but with parking or spin-then-park instead of pure spinning.
+// *  Use Taura-Oyama-Yonenzawa locks.
+// *  It's possible to construct a 1-0 lock if we encode the lockword as
+//    (List,LockByte).  Acquire will CAS the full lockword while Release
+//    will STB 0 into the LockByte.  The 1-0 scheme admits stranding, so
+//    acquiring threads use timers (ParkTimed) to detect and recover from
+//    the stranding window.  Thread/Node structures must be aligned on 256-byte
+//    boundaries by using placement-new.
+// *  Augment MCS with advisory back-link fields maintained with CAS().
+//    Pictorially:  LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner.
+//    The validity of the backlinks must be ratified before we trust the value.
+//    If the backlinks are invalid the exiting thread must back-track through the
+//    the forward links, which are always trustworthy.
+// *  Add a successor indication.  The LockWord is currently encoded as
+//    (List, LOCKBIT:1).  We could also add a SUCCBIT or an explicit _succ variable
+//    to provide the usual futile-wakeup optimization.
+//    See RTStt for details.
+// *  Consider schedctl.sc_nopreempt to cover the critical section.
+//
+
+
+typedef volatile intptr_t MutexT ;      // Mux Lock-word
+enum MuxBits { LOCKBIT = 1 } ;
+
+void Thread::muxAcquire (volatile intptr_t * Lock, const char * LockName) {
+  intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
+  if (w == 0) return ;
+  if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+     return ;
+  }
+
+  TEVENT (muxAcquire - Contention) ;
+  ParkEvent * const Self = Thread::current()->_MuxEvent ;
+  assert ((intptr_t(Self) & LOCKBIT) == 0, "invariant") ;
   for (;;) {
-    p = FreeList ;
-    if (p  == NULL) break ;
-    // 1: Detach
-    // Tantamount to p = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
-       continue ;
-    }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    Parker * List = p->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        Parker * Arv =  (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
+     int its = (os::is_MP() ? 100 : 0) + 1 ;
+
+     // Optional spin phase: spin-then-park strategy
+     while (--its >= 0) {
+       w = *Lock ;
+       if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+          return ;
+       }
+     }
+
+     Self->reset() ;
+     Self->OnList = intptr_t(Lock) ;
+     // The following fence() isn't _strictly necessary as the subsequent
+     // CAS() both serializes execution and ratifies the fetched *Lock value.
+     OrderAccess::fence();
+     for (;;) {
+        w = *Lock ;
+        if ((w & LOCKBIT) == 0) {
+            if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+                Self->OnList = 0 ;   // hygiene - allows stronger asserts
+                return ;
+            }
+            continue ;      // Interference -- *Lock changed -- Just retry
         }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        Parker * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
-  }
-
-  if (p != NULL) {
-    guarantee (p->AssociatedWith == NULL, "invariant") ;
-  } else {
-    // Do this the hard way -- materialize a new Parker..
-    // In rare cases an allocating thread might detach
-    // a long list -- installing null into FreeList --and
-    // then stall.  Another thread calling Allocate() would see
-    // FreeList == null and then invoke the ctor.  In this case we
-    // end up with more Parkers in circulation than we need, but
-    // the race is rare and the outcome is benign.
-    // Ideally, the # of extant Parkers is equal to the
-    // maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the
-    // freelist can be transiently inaccessible.  At worst
-    // we may end up with the # of Parkers in circulation
-    // slightly above the ideal.
-    p = new Parker() ;
-  }
-  p->AssociatedWith = t ;          // Associate p with t
-  p->FreeNext       = NULL ;
-  return p ;
-}
-
-
-void Parker::Release (Parker * p) {
-  if (p == NULL) return ;
-  guarantee (p->AssociatedWith != NULL, "invariant") ;
-  guarantee (p->FreeNext == NULL      , "invariant") ;
-  p->AssociatedWith = NULL ;
-  for (;;) {
-    // Push p onto FreeList
-    Parker * List = FreeList ;
-    p->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
+        assert (w & LOCKBIT, "invariant") ;
+        Self->ListNext = (ParkEvent *) (w & ~LOCKBIT );
+        if (Atomic::cmpxchg_ptr (intptr_t(Self)|LOCKBIT, Lock, w) == w) break ;
+     }
+
+     while (Self->OnList != 0) {
+        Self->park() ;
+     }
   }
 }
 
+void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) {
+  intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
+  if (w == 0) return ;
+  if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+    return ;
+  }
+
+  TEVENT (muxAcquire - Contention) ;
+  ParkEvent * ReleaseAfter = NULL ;
+  if (ev == NULL) {
+    ev = ReleaseAfter = ParkEvent::Allocate (NULL) ;
+  }
+  assert ((intptr_t(ev) & LOCKBIT) == 0, "invariant") ;
+  for (;;) {
+    guarantee (ev->OnList == 0, "invariant") ;
+    int its = (os::is_MP() ? 100 : 0) + 1 ;
+
+    // Optional spin phase: spin-then-park strategy
+    while (--its >= 0) {
+      w = *Lock ;
+      if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+        if (ReleaseAfter != NULL) {
+          ParkEvent::Release (ReleaseAfter) ;
+        }
+        return ;
+      }
+    }
+
+    ev->reset() ;
+    ev->OnList = intptr_t(Lock) ;
+    // The following fence() isn't _strictly necessary as the subsequent
+    // CAS() both serializes execution and ratifies the fetched *Lock value.
+    OrderAccess::fence();
+    for (;;) {
+      w = *Lock ;
+      if ((w & LOCKBIT) == 0) {
+        if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
+          ev->OnList = 0 ;
+          // We call ::Release while holding the outer lock, thus
+          // artificially lengthening the critical section.
+          // Consider deferring the ::Release() until the subsequent unlock(),
+          // after we've dropped the outer lock.
+          if (ReleaseAfter != NULL) {
+            ParkEvent::Release (ReleaseAfter) ;
+          }
+          return ;
+        }
+        continue ;      // Interference -- *Lock changed -- Just retry
+      }
+      assert (w & LOCKBIT, "invariant") ;
+      ev->ListNext = (ParkEvent *) (w & ~LOCKBIT );
+      if (Atomic::cmpxchg_ptr (intptr_t(ev)|LOCKBIT, Lock, w) == w) break ;
+    }
+
+    while (ev->OnList != 0) {
+      ev->park() ;
+    }
+  }
+}
+
+// Release() must extract a successor from the list and then wake that thread.
+// It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme
+// similar to that used by ParkEvent::Allocate() and ::Release().  DMR-based
+// Release() would :
+// (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list.
+// (B) Extract a successor from the private list "in-hand"
+// (C) attempt to CAS() the residual back into *Lock over null.
+//     If there were any newly arrived threads and the CAS() would fail.
+//     In that case Release() would detach the RATs, re-merge the list in-hand
+//     with the RATs and repeat as needed.  Alternately, Release() might
+//     detach and extract a successor, but then pass the residual list to the wakee.
+//     The wakee would be responsible for reattaching and remerging before it
+//     competed for the lock.
+//
+// Both "pop" and DMR are immune from ABA corruption -- there can be
+// multiple concurrent pushers, but only one popper or detacher.
+// This implementation pops from the head of the list.  This is unfair,
+// but tends to provide excellent throughput as hot threads remain hot.
+// (We wake recently run threads first).
+
+void Thread::muxRelease (volatile intptr_t * Lock)  {
+  for (;;) {
+    const intptr_t w = Atomic::cmpxchg_ptr (0, Lock, LOCKBIT) ;
+    assert (w & LOCKBIT, "invariant") ;
+    if (w == LOCKBIT) return ;
+    ParkEvent * List = (ParkEvent *) (w & ~LOCKBIT) ;
+    assert (List != NULL, "invariant") ;
+    assert (List->OnList == intptr_t(Lock), "invariant") ;
+    ParkEvent * nxt = List->ListNext ;
+
+    // The following CAS() releases the lock and pops the head element.
+    if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) {
+      continue ;
+    }
+    List->OnList = 0 ;
+    OrderAccess::fence() ;
+    List->unpark () ;
+    return ;
+  }
+}
+
+
 void Threads::verify() {
   ALL_JAVA_THREADS(p) {
     p->verify();