changeset 17718:804f89b6ff46 hs25.20-b06

Merge
author amurillo
date Thu, 13 Mar 2014 09:57:31 -0700
parents 0c8d4e3b5c9a (current diff) e35733785856 (diff)
children c3d92e048737
files
diffstat 41 files changed, 1229 insertions(+), 1413 deletions(-) [+]
line wrap: on
line diff
--- a/make/hotspot_version	Wed Mar 12 14:10:31 2014 -0700
+++ b/make/hotspot_version	Thu Mar 13 09:57:31 2014 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=20
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -98,217 +98,6 @@
   return Address::make_array(adr);
 }
 
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
-  assert_different_registers(lock_reg, obj_reg, swap_reg);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  bool need_tmp_reg = false;
-  if (tmp_reg == noreg) {
-    need_tmp_reg = true;
-    tmp_reg = lock_reg;
-  } else {
-    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  }
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movl(swap_reg, mark_addr);
-  }
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, swap_reg);
-  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  // Note that because there is no current thread register on x86 we
-  // need to store off the mark word we read out of the object to
-  // avoid reloading it and needing to recheck invariants below. This
-  // store is unfortunate but it makes the overall code shorter and
-  // simpler.
-  movl(saved_mark_addr, swap_reg);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  xorl(swap_reg, tmp_reg);
-  if (swap_reg_contains_mark) {
-    null_check_offset = offset();
-  }
-  movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testl(swap_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  movl(swap_reg, saved_mark_addr);
-  andl(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  orl(tmp_reg, swap_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
-  movl(swap_reg, saved_mark_addr);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  movl(swap_reg, saved_mark_addr);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments) {
   call(RuntimeAddress(entry_point));
@@ -726,165 +515,6 @@
   return array;
 }
 
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
-  assert(tmp_reg != noreg, "tmp_reg must be supplied");
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movq(swap_reg, mark_addr);
-  }
-  movq(tmp_reg, swap_reg);
-  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  xorq(tmp_reg, swap_reg);
-  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  andq(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  movq(tmp_reg, swap_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-
 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
   Label L, E;
 
@@ -1360,9 +990,16 @@
 
 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
   pushf();
-  if (os::is_MP())
-    lock();
-  incrementl(counter_addr);
+  if (reachable(counter_addr)) {
+    if (os::is_MP())
+      lock();
+    incrementl(as_Address(counter_addr));
+  } else {
+    lea(rscratch1, counter_addr);
+    if (os::is_MP())
+      lock();
+    incrementl(Address(rscratch1, 0));
+  }
   popf();
 }
 
@@ -1393,6 +1030,234 @@
   }
 }
 
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+  LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = lock_reg;
+    assert_different_registers(lock_reg, obj_reg, swap_reg);
+  } else {
+    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+  }
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  if (PrintBiasedLockingStatistics && counters == NULL) {
+    counters = BiasedLocking::counters();
+  }
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    movptr(swap_reg, mark_addr);
+  }
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  movptr(tmp_reg, swap_reg);
+  andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  jcc(Assembler::notEqual, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+#ifndef _LP64
+  // Note that because there is no current thread register on x86_32 we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  movptr(saved_mark_addr, swap_reg);
+#endif
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+  xorptr(tmp_reg, swap_reg);
+  Register header_reg = tmp_reg;
+#else
+  xorptr(tmp_reg, swap_reg);
+  get_thread(swap_reg);
+  xorptr(swap_reg, tmp_reg);
+  Register header_reg = swap_reg;
+#endif
+  andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->biased_lock_entry_count_addr()));
+  }
+  jcc(Assembler::equal, done);
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
+  jccb(Assembler::notZero, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  testptr(header_reg, markOopDesc::epoch_mask_in_place);
+  jccb(Assembler::notZero, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  andptr(swap_reg,
+         markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+#ifdef _LP64
+  movptr(tmp_reg, swap_reg);
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(tmp_reg);
+  orptr(tmp_reg, swap_reg);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(swap_reg);
+  orptr(tmp_reg, swap_reg);
+  movptr(swap_reg, saved_mark_addr);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   assert(UseBiasedLocking, "why call this otherwise?");
 
@@ -1408,6 +1273,620 @@
   jcc(Assembler::equal, done);
 }
 
+#ifdef COMPILER2
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time.  These methods would accept arguments as
+// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
+// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods.  That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads.  We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
+//    the lock operators would typically be faster than reifying Self.
+//
+// *  Ideally I'd define the primitives as:
+//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
+//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
+//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
+//    Instead, we're stuck with a rather awkward and brittle register assignments below.
+//    Furthermore the register assignments are overconstrained, possibly resulting in
+//    sub-optimal code near the synchronization site.
+//
+// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
+//    Alternately, use a better sp-proximity test.
+//
+// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+//    Either one is sufficient to uniquely identify a thread.
+//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// *  Intrinsify notify() and notifyAll() for the common cases where the
+//    object is locked by the calling thread but the waitlist is empty.
+//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// *  use jccb and jmpb instead of jcc and jmp to improve code density.
+//    But beware of excessive branch density on AMD Opterons.
+//
+// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+//    or failure of the fast-path.  If the fast-path fails then we pass
+//    control to the slow-path, typically in C.  In Fast_Lock and
+//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+//    will emit a conditional branch immediately after the node.
+//    So we have branches to branches and lots of ICC.ZF games.
+//    Instead, it might be better to have C2 pass a "FailureLabel"
+//    into Fast_Lock and Fast_Unlock.  In the case of success, control
+//    will drop through the node.  ICC.ZF is undefined at exit.
+//    In the case of failure, the node will branch directly to the
+//    FailureLabel
+
+
+// obj: object to lock
+// box: on-stack box address (displaced header location) - KILLED
+// rax,: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
+  // Ensure the register assignents are disjoint
+  guarantee (objReg != boxReg, "");
+  guarantee (objReg != tmpReg, "");
+  guarantee (objReg != scrReg, "");
+  guarantee (boxReg != tmpReg, "");
+  guarantee (boxReg != scrReg, "");
+  guarantee (tmpReg == rax, "");
+
+  if (counters != NULL) {
+    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
+  }
+  if (EmitSync & 1) {
+      // set box->dhw = unused_mark (3)
+      // Force all sync thru slow-path: slow_enter() and slow_exit()
+      movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+      cmpptr (rsp, (int32_t)NULL_WORD);
+  } else
+  if (EmitSync & 2) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+      }
+
+      movptr(tmpReg, Address(objReg, 0));           // fetch markword
+      orptr (tmpReg, 0x1);
+      movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
+      if (os::is_MP()) {
+        lock();
+      }
+      cmpxchgptr(boxReg, Address(objReg, 0));       // Updates tmpReg
+      jccb(Assembler::equal, DONE_LABEL);
+      // Recursive locking
+      subptr(tmpReg, rsp);
+      andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+      movptr(Address(boxReg, 0), tmpReg);
+      bind(DONE_LABEL);
+  } else {
+    // Possible cases that we'll encounter in fast_lock
+    // ------------------------------------------------
+    // * Inflated
+    //    -- unlocked
+    //    -- Locked
+    //       = by self
+    //       = by other
+    // * biased
+    //    -- by Self
+    //    -- by other
+    // * neutral
+    // * stack-locked
+    //    -- by self
+    //       = sp-proximity test hits
+    //       = sp-proximity test generates false-negative
+    //    -- by other
+    //
+
+    Label IsInflated, DONE_LABEL;
+
+    // it's stack-locked, biased or neutral
+    // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+    // order to reduce the number of conditional branches in the most common cases.
+    // Beware -- there's a subtle invariant that fetch of the markword
+    // at [FETCH], below, will never observe a biased encoding (*101b).
+    // If this invariant is not held we risk exclusion (safety) failure.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
+    }
+
+    movptr(tmpReg, Address(objReg, 0));          // [FETCH]
+    testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jccb  (Assembler::notZero, IsInflated);
+
+    // Attempt stack-locking ...
+    orptr (tmpReg, 0x1);
+    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jccb(Assembler::equal, DONE_LABEL);
+
+    // Recursive locking
+    subptr(tmpReg, rsp);
+    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+    movptr(Address(boxReg, 0), tmpReg);
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jmpb(DONE_LABEL);
+
+    bind(IsInflated);
+#ifndef _LP64
+    // The object is inflated.
+    //
+    // TODO-FIXME: eliminate the ugly use of manifest constants:
+    //   Use markOopDesc::monitor_value instead of "2".
+    //   use markOop::unused_mark() instead of "3".
+    // The tmpReg value is an objectMonitor reference ORed with
+    // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
+    // objectmonitor pointer by masking off the "2" bit or we can just
+    // use tmpReg as an objectmonitor pointer but bias the objectmonitor
+    // field offsets with "-2" to compensate for and annul the low-order tag bit.
+    //
+    // I use the latter as it avoids AGI stalls.
+    // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
+    // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
+    //
+    #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
+
+    // boxReg refers to the on-stack BasicLock in the current frame.
+    // We'd like to write:
+    //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
+    // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
+    // additional latency as we have another ST in the store buffer that must drain.
+
+    if (EmitSync & 8192) {
+       movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
+       get_thread (scrReg);
+       movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
+       movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    } else
+    if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
+       movptr(scrReg, boxReg);
+       movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form: consider XORL tmpReg,tmpReg
+         movptr(tmpReg, NULL_WORD);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         // Test-And-CAS instead of CAS
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Ideally, I'd manifest "Self" with get_thread and then attempt
+       // to CAS the register containing Self into m->Owner.
+       // But we don't have enough registers, so instead we can either try to CAS
+       // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
+       // we later store "Self" into m->Owner.  Transiently storing a stack address
+       // (rsp or the address of the box) into  m->owner is harmless.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
+       jccb  (Assembler::notZero, DONE_LABEL);
+       get_thread (scrReg);                    // beware: clobbers ICCs
+       movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
+       xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    } else {
+       movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
+       movptr(boxReg, tmpReg);
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form
+         xorptr  (tmpReg, tmpReg);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Use either "Self" (in scr) or rsp as thread identity in _owner.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       get_thread (scrReg);
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    }
+#else // _LP64
+    // It's inflated
+
+    // TODO: someday avoid the ST-before-CAS penalty by
+    // relocating (deferring) the following ST.
+    // We should also think about trying a CAS without having
+    // fetched _owner.  If the CAS is successful we may
+    // avoid an RTO->RTS upgrade on the $line.
+
+    // Without cast to int32_t a movptr will destroy r10 which is typically obj
+    movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+
+    mov    (boxReg, tmpReg);
+    movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    testptr(tmpReg, tmpReg);
+    jccb   (Assembler::notZero, DONE_LABEL);
+
+    // It's inflated and appears unlocked
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    // Intentional fall-through into DONE_LABEL ...
+
+#endif
+
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    bind(DONE_LABEL);
+
+    // At DONE_LABEL the icc ZFlag is set as follows ...
+    // Fast_Unlock uses the same protocol.
+    // ZFlag == 1 -> Success
+    // ZFlag == 0 -> Failure - force control through the slow-path
+  }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed.  Must be EAX.
+// tmp: killed, cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1:  At return-time the interpreter automatically and quietly unlocks any
+//      objects acquired the current activation (frame).  Recall that the
+//      interpreter maintains an on-stack list of locks currently held by
+//      a frame.
+// I2:  If a method attempts to unlock an object that is not held by the
+//      the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
+  guarantee (objReg != boxReg, "");
+  guarantee (objReg != tmpReg, "");
+  guarantee (boxReg != tmpReg, "");
+  guarantee (boxReg == rax, "");
+
+  if (EmitSync & 4) {
+    // Disable - inhibit all inlining.  Force control through the slow-path
+    cmpptr (rsp, 0);
+  } else
+  if (EmitSync & 8) {
+    Label DONE_LABEL;
+    if (UseBiasedLocking) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+    // Classic stack-locking code ...
+    // Check whether the displaced header is 0
+    //(=> recursive unlock)
+    movptr(tmpReg, Address(boxReg, 0));
+    testptr(tmpReg, tmpReg);
+    jccb(Assembler::zero, DONE_LABEL);
+    // If not recursive lock, reset the header to displaced header
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
+    bind(DONE_LABEL);
+  } else {
+    Label DONE_LABEL, Stacked, CheckSucc;
+
+    // Critically, the biased locking test must have precedence over
+    // and appear before the (box->dhw == 0) recursive stack-lock test.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+
+    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+    movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
+    jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+
+    testptr(tmpReg, 0x02);                          // Inflated?
+    jccb  (Assembler::zero, Stacked);
+
+    // It's inflated.
+    // Despite our balanced locking property we still check that m->_owner == Self
+    // as java routines or native JNI code called by this thread might
+    // have released the lock.
+    // Refer to the comments in synchronizer.cpp for how we might encode extra
+    // state in _succ so we can avoid fetching EntryList|cxq.
+    //
+    // I'd like to add more cases in fast_lock() and fast_unlock() --
+    // such as recursive enter and exit -- but we have to be wary of
+    // I$ bloat, T$ effects and BP$ effects.
+    //
+    // If there's no contention try a 1-0 exit.  That is, exit without
+    // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+    // we detect and recover from the race that the 1-0 exit admits.
+    //
+    // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+    // before it STs null into _owner, releasing the lock.  Updates
+    // to data protected by the critical section must be visible before
+    // we drop the lock (and thus before any other thread could acquire
+    // the lock and observe the fields protected by the lock).
+    // IA32's memory-model is SPO, so STs are ordered with respect to
+    // each other and there's no need for an explicit barrier (fence).
+    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+#ifndef _LP64
+    get_thread (boxReg);
+    if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+      // prefetchw [ebx + Offset(_owner)-2]
+      prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    }
+
+    // Note that we could employ various encoding schemes to reduce
+    // the number of loads below (currently 4) to just 2 or 3.
+    // Refer to the comments in synchronizer.cpp.
+    // In practice the chain of fetches doesn't seem to impact performance, however.
+    if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
+       // Attempt to reduce branch density - AMD's branch predictor.
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    } else {
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, CheckSucc);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    }
+
+    // The Following code fragment (EmitSync & 65536) improves the performance of
+    // contended applications and contended synchronization microbenchmarks.
+    // Unfortunately the emission of the code - even though not executed - causes regressions
+    // in scimark and jetstream, evidently because of $ effects.  Replacing the code
+    // with an equal number of never-executed NOPs results in the same regression.
+    // We leave it off by default.
+
+    if ((EmitSync & 65536) != 0) {
+       Label LSuccess, LGoSlowPath ;
+
+       bind  (CheckSucc);
+
+       // Optional pre-test ... it's safe to elide this
+       if ((EmitSync & 16) == 0) {
+          cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+          jccb  (Assembler::zero, LGoSlowPath);
+       }
+
+       // We have a classic Dekker-style idiom:
+       //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
+       // There are a number of ways to implement the barrier:
+       // (1) lock:andl &m->_owner, 0
+       //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
+       //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
+       //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
+       // (2) If supported, an explicit MFENCE is appealing.
+       //     In older IA32 processors MFENCE is slower than lock:add or xchg
+       //     particularly if the write-buffer is full as might be the case if
+       //     if stores closely precede the fence or fence-equivalent instruction.
+       //     In more modern implementations MFENCE appears faster, however.
+       // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
+       //     The $lines underlying the top-of-stack should be in M-state.
+       //     The locked add instruction is serializing, of course.
+       // (4) Use xchg, which is serializing
+       //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
+       // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
+       //     The integer condition codes will tell us if succ was 0.
+       //     Since _succ and _owner should reside in the same $line and
+       //     we just stored into _owner, it's likely that the $line
+       //     remains in M-state for the lock:orl.
+       //
+       // We currently use (3), although it's likely that switching to (2)
+       // is correct for the future.
+
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       if (os::is_MP()) {
+          if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
+            mfence();
+          } else {
+            lock (); addptr(Address(rsp, 0), 0);
+          }
+       }
+       // Ratify _succ remains non-null
+       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
+       jccb  (Assembler::notZero, LSuccess);
+
+       xorptr(boxReg, boxReg);                  // box is really EAX
+       if (os::is_MP()) { lock(); }
+       cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       jccb  (Assembler::notEqual, LSuccess);
+       // Since we're low on registers we installed rsp as a placeholding in _owner.
+       // Now install Self over rsp.  This is safe as we're transitioning from
+       // non-null to non=null
+       get_thread (boxReg);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
+       // Intentional fall-through into LGoSlowPath ...
+
+       bind  (LGoSlowPath);
+       orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+       jmpb  (DONE_LABEL);
+
+       bind  (LSuccess);
+       xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
+       jmpb  (DONE_LABEL);
+    }
+
+    bind (Stacked);
+    // It's not inflated and it's not recursively stack-locked and it's not biased.
+    // It must be stack-locked.
+    // Try to reset the header to displaced header.
+    // The "box" value on the stack is stable, so we can reload
+    // and be assured we observe the same value as above.
+    movptr(tmpReg, Address(boxReg, 0));
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+    // Intention fall-thru into DONE_LABEL
+
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    if ((EmitSync & 65536) == 0) {
+       bind (CheckSucc);
+    }
+#else // _LP64
+    // It's inflated
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    xorptr(boxReg, r15_thread);
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, DONE_LABEL);
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, CheckSucc);
+    movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+    jmpb  (DONE_LABEL);
+
+    if ((EmitSync & 65536) == 0) {
+      Label LSuccess, LGoSlowPath ;
+      bind  (CheckSucc);
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::zero, LGoSlowPath);
+
+      // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
+      // the explicit ST;MEMBAR combination, but masm doesn't currently support
+      // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
+      // are all faster when the write buffer is populated.
+      movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      if (os::is_MP()) {
+         lock (); addl (Address(rsp, 0), 0);
+      }
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::notZero, LSuccess);
+
+      movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
+      if (os::is_MP()) { lock(); }
+      cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+      jccb  (Assembler::notEqual, LSuccess);
+      // Intentional fall-through into slow-path
+
+      bind  (LGoSlowPath);
+      orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+      jmpb  (DONE_LABEL);
+
+      bind  (LSuccess);
+      testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
+      jmpb  (DONE_LABEL);
+    }
+
+    bind  (Stacked);
+    movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+    if (os::is_MP()) { lock(); }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+
+    if (EmitSync & 65536) {
+       bind (CheckSucc);
+    }
+#endif
+    bind(DONE_LABEL);
+    // Avoid branch to branch on AMD processors
+    if (EmitSync & 32768) {
+       nop();
+    }
+  }
+}
+#endif // COMPILER2
+
 void MacroAssembler::c2bool(Register x) {
   // implements x == 0 ? 0 : 1
   // note: must only look at least-significant byte of x
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -651,7 +651,12 @@
                            Label& done, Label* slow_case = NULL,
                            BiasedLockingCounters* counters = NULL);
   void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
+#ifdef COMPILER2
+  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+  // See full desription in macroAssembler_x86.cpp.
+  void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters);
+  void fast_unlock(Register obj, Register box, Register tmp);
+#endif
 
   Condition negate_condition(Condition cond);
 
--- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Thu Mar 13 09:57:31 2014 -0700
@@ -2910,542 +2910,6 @@
     emit_d8    (cbuf,0 );
   %}
 
-
-  // Because the transitions from emitted code to the runtime
-  // monitorenter/exit helper stubs are so slow it's critical that
-  // we inline both the stack-locking fast-path and the inflated fast path.
-  //
-  // See also: cmpFastLock and cmpFastUnlock.
-  //
-  // What follows is a specialized inline transliteration of the code
-  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
-  // another option would be to emit TrySlowEnter and TrySlowExit methods
-  // at startup-time.  These methods would accept arguments as
-  // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
-  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
-  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
-  // In practice, however, the # of lock sites is bounded and is usually small.
-  // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
-  // if the processor uses simple bimodal branch predictors keyed by EIP
-  // Since the helper routines would be called from multiple synchronization
-  // sites.
-  //
-  // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
-  // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
-  // to those specialized methods.  That'd give us a mostly platform-independent
-  // implementation that the JITs could optimize and inline at their pleasure.
-  // Done correctly, the only time we'd need to cross to native could would be
-  // to park() or unpark() threads.  We'd also need a few more unsafe operators
-  // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
-  // (b) explicit barriers or fence operations.
-  //
-  // TODO:
-  //
-  // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
-  //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
-  //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
-  //    the lock operators would typically be faster than reifying Self.
-  //
-  // *  Ideally I'd define the primitives as:
-  //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
-  //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
-  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
-  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
-  //    Furthermore the register assignments are overconstrained, possibly resulting in
-  //    sub-optimal code near the synchronization site.
-  //
-  // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
-  //    Alternately, use a better sp-proximity test.
-  //
-  // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
-  //    Either one is sufficient to uniquely identify a thread.
-  //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
-  //
-  // *  Intrinsify notify() and notifyAll() for the common cases where the
-  //    object is locked by the calling thread but the waitlist is empty.
-  //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
-  //
-  // *  use jccb and jmpb instead of jcc and jmp to improve code density.
-  //    But beware of excessive branch density on AMD Opterons.
-  //
-  // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
-  //    or failure of the fast-path.  If the fast-path fails then we pass
-  //    control to the slow-path, typically in C.  In Fast_Lock and
-  //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
-  //    will emit a conditional branch immediately after the node.
-  //    So we have branches to branches and lots of ICC.ZF games.
-  //    Instead, it might be better to have C2 pass a "FailureLabel"
-  //    into Fast_Lock and Fast_Unlock.  In the case of success, control
-  //    will drop through the node.  ICC.ZF is undefined at exit.
-  //    In the case of failure, the node will branch directly to the
-  //    FailureLabel
-
-
-  // obj: object to lock
-  // box: on-stack box address (displaced header location) - KILLED
-  // rax,: tmp -- KILLED
-  // scr: tmp -- KILLED
-  enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-
-    // Ensure the register assignents are disjoint
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (objReg != scrReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg != scrReg, "") ;
-    guarantee (tmpReg == as_Register(EAX_enc), "") ;
-
-    MacroAssembler masm(&cbuf);
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // set box->dhw = unused_mark (3)
-        // Force all sync thru slow-path: slow_enter() and slow_exit() 
-        masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
-        masm.cmpptr (rsp, (int32_t)0) ;                        
-    } else 
-    if (EmitSync & 2) { 
-        Label DONE_LABEL ;           
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
-        masm.orptr (tmpReg, 0x1);
-        masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
-        if (os::is_MP()) { masm.lock();  }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        masm.bind(DONE_LABEL) ; 
-    } else {  
-      // Possible cases that we'll encounter in fast_lock 
-      // ------------------------------------------------
-      // * Inflated
-      //    -- unlocked
-      //    -- Locked
-      //       = by self
-      //       = by other
-      // * biased
-      //    -- by Self
-      //    -- by other
-      // * neutral
-      // * stack-locked
-      //    -- by self
-      //       = sp-proximity test hits
-      //       = sp-proximity test generates false-negative
-      //    -- by other
-      //
-
-      Label IsInflated, DONE_LABEL, PopDone ;
-
-      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
-      // order to reduce the number of conditional branches in the most common cases.
-      // Beware -- there's a subtle invariant that fetch of the markword
-      // at [FETCH], below, will never observe a biased encoding (*101b).
-      // If this invariant is not held we risk exclusion (safety) failure.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-        masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-      }
-
-      masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
-      masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
-      masm.jccb  (Assembler::notZero, IsInflated) ;
-
-      // Attempt stack-locking ...
-      masm.orptr (tmpReg, 0x1);
-      masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-      if (os::is_MP()) { masm.lock();  }
-      masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jccb (Assembler::equal, DONE_LABEL);
-
-      // Recursive locking
-      masm.subptr(tmpReg, rsp);
-      masm.andptr(tmpReg, 0xFFFFF003 );
-      masm.movptr(Address(boxReg, 0), tmpReg);
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jmp  (DONE_LABEL) ;
-
-      masm.bind (IsInflated) ;
-
-      // The object is inflated.
-      //
-      // TODO-FIXME: eliminate the ugly use of manifest constants:
-      //   Use markOopDesc::monitor_value instead of "2".
-      //   use markOop::unused_mark() instead of "3".
-      // The tmpReg value is an objectMonitor reference ORed with
-      // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
-      // objectmonitor pointer by masking off the "2" bit or we can just
-      // use tmpReg as an objectmonitor pointer but bias the objectmonitor
-      // field offsets with "-2" to compensate for and annul the low-order tag bit.
-      //
-      // I use the latter as it avoids AGI stalls.
-      // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
-      // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
-      //
-      #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
-
-      // boxReg refers to the on-stack BasicLock in the current frame.
-      // We'd like to write:
-      //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
-      // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
-      // additional latency as we have another ST in the store buffer that must drain.
-
-      if (EmitSync & 8192) { 
-         masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
-         masm.get_thread (scrReg) ; 
-         masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
-         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
-         if (os::is_MP()) { masm.lock(); } 
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
-      } else 
-      if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
-         masm.movptr(scrReg, boxReg) ; 
-         masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form: consider XORL tmpReg,tmpReg
-           masm.movptr(tmpReg, NULL_WORD) ; 
-         } else { 
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           // Test-And-CAS instead of CAS
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Ideally, I'd manifest "Self" with get_thread and then attempt
-         // to CAS the register containing Self into m->Owner.
-         // But we don't have enough registers, so instead we can either try to CAS
-         // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
-         // we later store "Self" into m->Owner.  Transiently storing a stack address
-         // (rsp or the address of the box) into  m->owner is harmless.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         if (os::is_MP()) { masm.lock();  }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
-         masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
-         masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
-         masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
-                       
-         // If the CAS fails we can either retry or pass control to the slow-path.  
-         // We use the latter tactic.  
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      } else {
-         masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
-         masm.movptr(boxReg, tmpReg) ; 
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form
-           masm.xorptr  (tmpReg, tmpReg) ; 
-         } else { 
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Use either "Self" (in scr) or rsp as thread identity in _owner.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         masm.get_thread (scrReg) ;
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-
-         // If the CAS fails we can either retry or pass control to the slow-path.
-         // We use the latter tactic.
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      }
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch-to-branch on AMD processors
-      // This appears to be superstition.
-      if (EmitSync & 32) masm.nop() ;
-
-
-      // At DONE_LABEL the icc ZFlag is set as follows ...
-      // Fast_Unlock uses the same protocol.
-      // ZFlag == 1 -> Success
-      // ZFlag == 0 -> Failure - force control through the slow-path
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed.  Must be EAX.
-  // rbx,: killed tmp; cannot be obj nor box.
-  //
-  // Some commentary on balanced locking:
-  //
-  // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
-  // Methods that don't have provably balanced locking are forced to run in the
-  // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
-  // The interpreter provides two properties:
-  // I1:  At return-time the interpreter automatically and quietly unlocks any
-  //      objects acquired the current activation (frame).  Recall that the
-  //      interpreter maintains an on-stack list of locks currently held by
-  //      a frame.
-  // I2:  If a method attempts to unlock an object that is not held by the
-  //      the frame the interpreter throws IMSX.
-  //
-  // Lets say A(), which has provably balanced locking, acquires O and then calls B().
-  // B() doesn't have provably balanced locking so it runs in the interpreter.
-  // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
-  // is still locked by A().
-  //
-  // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
-  // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
-  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
-  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
-
-  enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg == as_Register(EAX_enc), "") ;
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-      // Disable - inhibit all inlining.  Force control through the slow-path
-      masm.cmpptr (rsp, 0) ; 
-    } else 
-    if (EmitSync & 8) {
-      Label DONE_LABEL ;
-      if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-      // classic stack-locking code ...
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      masm.testptr(tmpReg, tmpReg) ;
-      masm.jcc   (Assembler::zero, DONE_LABEL) ;
-      if (os::is_MP()) { masm.lock(); }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
-      masm.bind(DONE_LABEL);
-    } else {
-      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
-
-      // Critically, the biased locking test must have precedence over
-      // and appear before the (box->dhw == 0) recursive stack-lock test.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-      
-      masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
-      masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
-      masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
-
-      masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
-      masm.jccb  (Assembler::zero, Stacked) ;
-
-      masm.bind  (Inflated) ;
-      // It's inflated.
-      // Despite our balanced locking property we still check that m->_owner == Self
-      // as java routines or native JNI code called by this thread might
-      // have released the lock.
-      // Refer to the comments in synchronizer.cpp for how we might encode extra
-      // state in _succ so we can avoid fetching EntryList|cxq.
-      //
-      // I'd like to add more cases in fast_lock() and fast_unlock() --
-      // such as recursive enter and exit -- but we have to be wary of
-      // I$ bloat, T$ effects and BP$ effects.
-      //
-      // If there's no contention try a 1-0 exit.  That is, exit without
-      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
-      // we detect and recover from the race that the 1-0 exit admits.
-      //
-      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
-      // before it STs null into _owner, releasing the lock.  Updates
-      // to data protected by the critical section must be visible before
-      // we drop the lock (and thus before any other thread could acquire
-      // the lock and observe the fields protected by the lock).
-      // IA32's memory-model is SPO, so STs are ordered with respect to
-      // each other and there's no need for an explicit barrier (fence).
-      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-
-      masm.get_thread (boxReg) ;
-      if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-        // prefetchw [ebx + Offset(_owner)-2]
-        masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
-      }
-
-      // Note that we could employ various encoding schemes to reduce
-      // the number of loads below (currently 4) to just 2 or 3.
-      // Refer to the comments in synchronizer.cpp.
-      // In practice the chain of fetches doesn't seem to impact performance, however.
-      if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
-         // Attempt to reduce branch density - AMD's branch predictor.
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         masm.jmpb  (DONE_LABEL) ; 
-      } else { 
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
-         masm.jccb  (Assembler::notZero, CheckSucc) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         masm.jmpb  (DONE_LABEL) ; 
-      }
-
-      // The Following code fragment (EmitSync & 65536) improves the performance of
-      // contended applications and contended synchronization microbenchmarks.
-      // Unfortunately the emission of the code - even though not executed - causes regressions
-      // in scimark and jetstream, evidently because of $ effects.  Replacing the code
-      // with an equal number of never-executed NOPs results in the same regression.
-      // We leave it off by default.
-
-      if ((EmitSync & 65536) != 0) {
-         Label LSuccess, LGoSlowPath ;
-
-         masm.bind  (CheckSucc) ;
-
-         // Optional pre-test ... it's safe to elide this
-         if ((EmitSync & 16) == 0) { 
-            masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
-            masm.jccb  (Assembler::zero, LGoSlowPath) ; 
-         }
-
-         // We have a classic Dekker-style idiom:
-         //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
-         // There are a number of ways to implement the barrier:
-         // (1) lock:andl &m->_owner, 0
-         //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
-         //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
-         //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
-         // (2) If supported, an explicit MFENCE is appealing.
-         //     In older IA32 processors MFENCE is slower than lock:add or xchg
-         //     particularly if the write-buffer is full as might be the case if
-         //     if stores closely precede the fence or fence-equivalent instruction.
-         //     In more modern implementations MFENCE appears faster, however.
-         // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
-         //     The $lines underlying the top-of-stack should be in M-state.
-         //     The locked add instruction is serializing, of course.
-         // (4) Use xchg, which is serializing
-         //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
-         // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
-         //     The integer condition codes will tell us if succ was 0.
-         //     Since _succ and _owner should reside in the same $line and
-         //     we just stored into _owner, it's likely that the $line
-         //     remains in M-state for the lock:orl.
-         //
-         // We currently use (3), although it's likely that switching to (2)
-         // is correct for the future.
-            
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
-         if (os::is_MP()) { 
-            if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
-              masm.mfence();
-            } else { 
-              masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
-            }
-         }
-         // Ratify _succ remains non-null
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
-         masm.jccb  (Assembler::notZero, LSuccess) ; 
-
-         masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jccb  (Assembler::notEqual, LSuccess) ;
-         // Since we're low on registers we installed rsp as a placeholding in _owner.
-         // Now install Self over rsp.  This is safe as we're transitioning from
-         // non-null to non=null
-         masm.get_thread (boxReg) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
-         // Intentional fall-through into LGoSlowPath ...
-
-         masm.bind  (LGoSlowPath) ; 
-         masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmpb  (DONE_LABEL) ; 
-
-         masm.bind  (LSuccess) ; 
-         masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
-         masm.jmpb  (DONE_LABEL) ; 
-      }
-
-      masm.bind (Stacked) ;
-      // It's not inflated and it's not recursively stack-locked and it's not biased.
-      // It must be stack-locked.
-      // Try to reset the header to displaced header.
-      // The "box" value on the stack is stable, so we can reload
-      // and be assured we observe the same value as above.
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      if (os::is_MP()) {   masm.lock();    }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
-      // Intention fall-thru into DONE_LABEL
-
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      if ((EmitSync & 65536) == 0) {
-         masm.bind (CheckSucc) ;
-      }
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch to branch on AMD processors
-      if (EmitSync & 32768) { masm.nop() ; }
-    }
-  %}
-
-
   enc_class enc_pop_rdx() %{
     emit_opcode(cbuf,0x5A);
   %}
@@ -13147,23 +12611,26 @@
 
 // inlined locking and unlocking
 
-
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
-  match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr, USE_KILL box );
+instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
   ins_cost(300);
   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode( Fast_Lock(object,box,tmp,scr) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
-  match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp, USE_KILL box );
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, USE_KILL box);
   ins_cost(300);
   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
-  ins_encode( Fast_Unlock(object,box,tmp) );
-  ins_pipe( pipe_slow );
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+  %}
+  ins_pipe(pipe_slow);
 %}
 
 
--- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 13 09:57:31 2014 -0700
@@ -2591,231 +2591,6 @@
   %}
 
 
-  // obj: object to lock
-  // box: box address (header location) -- killed
-  // tmp: rax -- killed
-  // scr: rbx -- killed
-  //
-  // What follows is a direct transliteration of fast_lock() and fast_unlock()
-  // from i486.ad.  See that file for comments.
-  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
-  // use the shorter encoding.  (Movl clears the high-order 32-bits).
-
-
-  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
-  %{
-    Register objReg = as_Register((int)$obj$$reg);
-    Register boxReg = as_Register((int)$box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-    MacroAssembler masm(&cbuf);
-
-    // Verify uniqueness of register assignments -- necessary but not sufficient
-    assert (objReg != boxReg && objReg != tmpReg &&
-            objReg != scrReg && tmpReg != scrReg, "invariant") ;
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-        masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
-    } else
-    if (EmitSync & 2) {
-        Label DONE_LABEL;
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-        // QQQ was movl...
-        masm.movptr(tmpReg, 0x1);
-        masm.orptr(tmpReg, Address(objReg, 0));
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (os::is_MP()) {
-          masm.lock();
-        }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-
-        masm.bind(DONE_LABEL);
-        masm.nop(); // avoid branch to branch
-    } else {
-        Label DONE_LABEL, IsInflated, Egress;
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;
-        masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
-        masm.jcc   (Assembler::notZero, IsInflated) ;
-
-        // it's stack-locked, biased or neutral
-        // TODO: optimize markword triage order to reduce the number of
-        // conditional branches in the most common cases.
-        // Beware -- there's a subtle invariant that fetch of the markword
-        // at [FETCH], below, will never observe a biased encoding (*101b).
-        // If this invariant is not held we'll suffer exclusion (safety) failure.
-
-        if (UseBiasedLocking && !UseOptoBiasInlining) {
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
-          masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
-        }
-
-        // was q will it destroy high?
-        masm.orl   (tmpReg, 1) ;
-        masm.movptr(Address(boxReg, 0), tmpReg) ;
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jcc   (Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jmp   (DONE_LABEL) ;
-
-        masm.bind  (IsInflated) ;
-        // It's inflated
-
-        // TODO: someday avoid the ST-before-CAS penalty by
-        // relocating (deferring) the following ST.
-        // We should also think about trying a CAS without having
-        // fetched _owner.  If the CAS is successful we may
-        // avoid an RTO->RTS upgrade on the $line.
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-
-        masm.mov    (boxReg, tmpReg) ;
-        masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        masm.testptr(tmpReg, tmpReg) ;
-        masm.jcc    (Assembler::notZero, DONE_LABEL) ;
-
-        // It's inflated and appears unlocked
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        // Intentional fall-through into DONE_LABEL ...
-
-        masm.bind  (DONE_LABEL) ;
-        masm.nop   () ;                 // avoid jmp to jmp
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed
-  // RBX: killed tmp; cannot be obj nor box
-  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
-  %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-       masm.cmpptr(rsp, 0) ;
-    } else
-    if (EmitSync & 8) {
-       Label DONE_LABEL;
-       if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       // Check whether the displaced header is 0
-       //(=> recursive unlock)
-       masm.movptr(tmpReg, Address(boxReg, 0));
-       masm.testptr(tmpReg, tmpReg);
-       masm.jcc(Assembler::zero, DONE_LABEL);
-
-       // If not recursive lock, reset the header to displaced header
-       if (os::is_MP()) {
-         masm.lock();
-       }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-       masm.bind(DONE_LABEL);
-       masm.nop(); // avoid branch to branch
-    } else {
-       Label DONE_LABEL, Stacked, CheckSucc ;
-
-       if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       masm.movptr(tmpReg, Address(objReg, 0)) ;
-       masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
-       masm.jcc   (Assembler::zero, DONE_LABEL) ;
-       masm.testl (tmpReg, 0x02) ;
-       masm.jcc   (Assembler::zero, Stacked) ;
-
-       // It's inflated
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-       masm.xorptr(boxReg, r15_thread) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, DONE_LABEL) ;
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, CheckSucc) ;
-       masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-       masm.jmp   (DONE_LABEL) ;
-
-       if ((EmitSync & 65536) == 0) {
-         Label LSuccess, LGoSlowPath ;
-         masm.bind  (CheckSucc) ;
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::zero, LGoSlowPath) ;
-
-         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
-         // the explicit ST;MEMBAR combination, but masm doesn't currently support
-         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
-         // are all faster when the write buffer is populated.
-         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         if (os::is_MP()) {
-            masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
-         }
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::notZero, LSuccess) ;
-
-         masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jcc   (Assembler::notEqual, LSuccess) ;
-         // Intentional fall-through into slow-path
-
-         masm.bind  (LGoSlowPath) ;
-         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmp   (DONE_LABEL) ;
-
-         masm.bind  (LSuccess) ;
-         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
-         masm.jmp   (DONE_LABEL) ;
-       }
-
-       masm.bind  (Stacked) ;
-       masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
-       if (os::is_MP()) { masm.lock(); }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-
-       if (EmitSync & 65536) {
-          masm.bind (CheckSucc) ;
-       }
-       masm.bind(DONE_LABEL);
-       if (EmitSync & 32768) {
-          masm.nop();                      // avoid branch to branch
-       }
-    }
-  %}
-
-
   enc_class enc_rethrow()
   %{
     cbuf.set_insts_mark();
@@ -11443,27 +11218,25 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
-%{
+instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
   match(Set cr (FastLock object box));
   effect(TEMP tmp, TEMP scr, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode(Fast_Lock(object, box, tmp, scr));
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+  %}
   ins_pipe(pipe_slow);
 %}
 
-instruct cmpFastUnlock(rFlagsReg cr,
-                       rRegP object, rax_RegP box, rRegP tmp)
-%{
+instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
-  ins_encode(Fast_Unlock(object, box, tmp));
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
--- a/src/os/bsd/vm/os_bsd.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os/bsd/vm/os_bsd.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -2636,9 +2636,21 @@
   }
 }
 
-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os::sleep(Thread::current(), 1, false);
+void os::naked_short_sleep(jlong ms) {
+  struct timespec req;
+
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  req.tv_sec = 0;
+  if (ms > 0) {
+    req.tv_nsec = (ms % 1000) * 1000000;
+  }
+  else {
+    req.tv_nsec = 1;
+  }
+
+  nanosleep(&req, NULL);
+
+  return;
 }
 
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
--- a/src/os/linux/vm/os_linux.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -3871,9 +3871,33 @@
   }
 }
 
-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os::sleep(Thread::current(), 1, false);
+//
+// Short sleep, direct OS call.
+//
+// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
+// sched_yield(2) will actually give up the CPU:
+//
+//   * Alone on this pariticular CPU, keeps running.
+//   * Before the introduction of "skip_buddy" with "compat_yield" disabled
+//     (pre 2.6.39).
+//
+// So calling this with 0 is an alternative.
+//
+void os::naked_short_sleep(jlong ms) {
+  struct timespec req;
+
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  req.tv_sec = 0;
+  if (ms > 0) {
+    req.tv_nsec = (ms % 1000) * 1000000;
+  }
+  else {
+    req.tv_nsec = 1;
+  }
+
+  nanosleep(&req, NULL);
+
+  return;
 }
 
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
--- a/src/os/solaris/vm/os_solaris.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2232,8 +2232,8 @@
         st->cr();
         status = true;
       }
-      ::close(fd);
     }
+    ::close(fd);
   }
   return status;
 }
@@ -2257,13 +2257,18 @@
                           "ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG",
                           "ILL_COPROC", "ILL_BADSTK" };
 
+const size_t ill_names_length = (sizeof(ill_names)/sizeof(char *));
+
 const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV",
                           "FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES",
                           "FPE_FLTINV", "FPE_FLTSUB" };
+const size_t fpe_names_length = (sizeof(fpe_names)/sizeof(char *));
 
 const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" };
+const size_t segv_names_length = (sizeof(segv_names)/sizeof(char *));
 
 const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" };
+const size_t bus_names_length = (sizeof(bus_names)/sizeof(char *));
 
 void os::print_siginfo(outputStream* st, void* siginfo) {
   st->print("siginfo:");
@@ -2282,19 +2287,23 @@
   assert(c > 0, "unexpected si_code");
   switch (si->si_signo) {
   case SIGILL:
-    st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]);
+    st->print(", si_code=%d (%s)", c,
+      c >= ill_names_length ? "" : ill_names[c]);
     st->print(", si_addr=" PTR_FORMAT, si->si_addr);
     break;
   case SIGFPE:
-    st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]);
+    st->print(", si_code=%d (%s)", c,
+      c >= fpe_names_length ? "" : fpe_names[c]);
     st->print(", si_addr=" PTR_FORMAT, si->si_addr);
     break;
   case SIGSEGV:
-    st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]);
+    st->print(", si_code=%d (%s)", c,
+      c >= segv_names_length ? "" : segv_names[c]);
     st->print(", si_addr=" PTR_FORMAT, si->si_addr);
     break;
   case SIGBUS:
-    st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]);
+    st->print(", si_code=%d (%s)", c,
+      c >= bus_names_length ? "" : bus_names[c]);
     st->print(", si_addr=" PTR_FORMAT, si->si_addr);
     break;
   default:
@@ -3011,7 +3020,7 @@
 char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) {
   const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE };
   const size_t types = sizeof(info_types) / sizeof(info_types[0]);
-  uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT];
+  uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT + 1];
   uint_t validity[MAX_MEMINFO_CNT];
 
   size_t page_size = MAX2((size_t)os::vm_page_size(), page_expected->size);
@@ -3050,7 +3059,7 @@
       }
     }
 
-    if (i != addrs_count) {
+    if (i < addrs_count) {
       if ((validity[i] & 2) != 0) {
         page_found->lgrp_id = outdata[types * i];
       } else {
@@ -3540,9 +3549,14 @@
   return os_sleep(millis, interruptible);
 }
 
-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os_sleep(1, false);
+void os::naked_short_sleep(jlong ms) {
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+
+  // usleep is deprecated and removed from POSIX, in favour of nanosleep, but
+  // Solaris requires -lrt for this.
+  usleep((ms * 1000));
+
+  return;
 }
 
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -431,10 +431,12 @@
 
       RESTARTABLE(::read(fd, addr, remaining), result);
       if (result == OS_ERR) {
+        ::close(fd);
         THROW_MSG_0(vmSymbols::java_io_IOException(), "Read error");
+      } else {
+        remaining-=result;
+        addr+=result;
       }
-      remaining-=result;
-      addr+=result;
     }
 
     ::close(fd);
@@ -906,8 +908,16 @@
   FREE_C_HEAP_ARRAY(char, filename, mtInternal);
 
   // open the shared memory file for the give vmid
-  fd = open_sharedmem_file(rfilename, file_flags, CHECK);
-  assert(fd != OS_ERR, "unexpected value");
+  fd = open_sharedmem_file(rfilename, file_flags, THREAD);
+
+  if (fd == OS_ERR) {
+    return;
+  }
+
+  if (HAS_PENDING_EXCEPTION) {
+    ::close(fd);
+    return;
+  }
 
   if (*sizep == 0) {
     size = sharedmem_filesize(fd, CHECK);
--- a/src/os/windows/vm/os_windows.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -3496,6 +3496,16 @@
   return result;
 }
 
+//
+// Short sleep, direct OS call.
+//
+// ms = 0, means allow others (if any) to run.
+//
+void os::naked_short_sleep(jlong ms) {
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  Sleep(ms);
+}
+
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
 void os::infinite_sleep() {
   while (true) {    // sleep forever ...
@@ -3623,13 +3633,14 @@
          "possibility of dangling Thread pointer");
 
   OSThread* osthread = thread->osthread();
-  bool interrupted = osthread->interrupted();
   // There is no synchronization between the setting of the interrupt
   // and it being cleared here. It is critical - see 6535709 - that
   // we only clear the interrupt state, and reset the interrupt event,
   // if we are going to report that we were indeed interrupted - else
   // an interrupt can be "lost", leading to spurious wakeups or lost wakeups
-  // depending on the timing
+  // depending on the timing. By checking thread interrupt event to see
+  // if the thread gets real interrupt thus prevent spurious wakeup.
+  bool interrupted = osthread->interrupted() && (WaitForSingleObject(osthread->interrupt_event(), 0) == WAIT_OBJECT_0);
   if (interrupted && clear_interrupted) {
     osthread->set_interrupted(false);
     ResetEvent(osthread->interrupt_event());
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -475,9 +475,11 @@
         // here if the underlying file has been truncated.
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
-          stub = StubRoutines::handler_for_unsafe_access();
+        if (cb != NULL) {
+          nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+          if (nm != NULL && nm->has_unsafe_access()) {
+            stub = StubRoutines::handler_for_unsafe_access();
+          }
         }
       }
       else
@@ -724,6 +726,7 @@
   err.report_and_die();
 
   ShouldNotReachHere();
+  return false;
 }
 
 void os::print_context(outputStream *st, void *context) {
--- a/src/share/vm/classfile/altHashing.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/classfile/altHashing.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,18 +39,18 @@
 }
 
 // Seed value used for each alternative hash calculated.
-jint AltHashing::compute_seed() {
+juint AltHashing::compute_seed() {
   jlong nanos = os::javaTimeNanos();
   jlong now = os::javaTimeMillis();
-  jint SEED_MATERIAL[8] = {
-            (jint) object_hash(SystemDictionary::String_klass()),
-            (jint) object_hash(SystemDictionary::System_klass()),
-            (jint) os::random(),  // current thread isn't a java thread
-            (jint) (((julong)nanos) >> 32),
-            (jint) nanos,
-            (jint) (((julong)now) >> 32),
-            (jint) now,
-            (jint) (os::javaTimeNanos() >> 2)
+  int SEED_MATERIAL[8] = {
+            (int) object_hash(SystemDictionary::String_klass()),
+            (int) object_hash(SystemDictionary::System_klass()),
+            (int) os::random(),  // current thread isn't a java thread
+            (int) (((julong)nanos) >> 32),
+            (int) nanos,
+            (int) (((julong)now) >> 32),
+            (int) now,
+            (int) (os::javaTimeNanos() >> 2)
   };
 
   return murmur3_32(SEED_MATERIAL, 8);
@@ -58,14 +58,14 @@
 
 
 // Murmur3 hashing for Symbol
-jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) {
+  juint h1 = seed;
   int count = len;
   int offset = 0;
 
   // body
   while (count >= 4) {
-    jint k1 = (data[offset] & 0x0FF)
+    juint k1 = (data[offset] & 0x0FF)
         | (data[offset + 1] & 0x0FF) << 8
         | (data[offset + 2] & 0x0FF) << 16
         | data[offset + 3] << 24;
@@ -85,7 +85,7 @@
   // tail
 
   if (count > 0) {
-    jint k1 = 0;
+    juint k1 = 0;
 
     switch (count) {
       case 3:
@@ -109,18 +109,18 @@
   h1 ^= len;
 
   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((unsigned int)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
 
   return h1;
 }
 
 // Murmur3 hashing for Strings
-jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) {
+  juint h1 = seed;
 
   int off = 0;
   int count = len;
@@ -129,7 +129,7 @@
   while (count >= 2) {
     jchar d1 = data[off++] & 0xFFFF;
     jchar d2 = data[off++];
-    jint k1 = (d1 | d2 << 16);
+    juint k1 = (d1 | d2 << 16);
 
     count -= 2;
 
@@ -145,7 +145,7 @@
   // tail
 
   if (count > 0) {
-    int k1 = data[off];
+    juint k1 = (juint)data[off];
 
     k1 *= 0xcc9e2d51;
     k1 = Integer_rotateLeft(k1, 15);
@@ -157,25 +157,25 @@
   h1 ^= len * 2; // (Character.SIZE / Byte.SIZE);
 
   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((unsigned int)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
 
   return h1;
 }
 
 // Hash used for the seed.
-jint AltHashing::murmur3_32(jint seed, const int* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const int* data, int len) {
+  juint h1 = seed;
 
   int off = 0;
   int end = len;
 
   // body
   while (off < end) {
-    jint k1 = data[off++];
+    juint k1 = (juint)data[off++];
 
     k1 *= 0xcc9e2d51;
     k1 = Integer_rotateLeft(k1, 15);
@@ -193,26 +193,26 @@
   h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE);
 
   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((juint)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((juint)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((juint)h1) >> 16;
+  h1 ^= h1 >> 16;
 
   return h1;
 }
 
-jint AltHashing::murmur3_32(const int* data, int len) {
+juint AltHashing::murmur3_32(const int* data, int len) {
   return murmur3_32(0, data, len);
 }
 
 #ifndef PRODUCT
 // Overloaded versions for internal test.
-jint AltHashing::murmur3_32(const jbyte* data, int len) {
+juint AltHashing::murmur3_32(const jbyte* data, int len) {
   return murmur3_32(0, data, len);
 }
 
-jint AltHashing::murmur3_32(const jchar* data, int len) {
+juint AltHashing::murmur3_32(const jchar* data, int len) {
   return murmur3_32(0, data, len);
 }
 
@@ -251,11 +251,11 @@
 
   // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
   for (int i = 0; i < 256; i++) {
-    jint hash = murmur3_32(256 - i, vector, i);
+    juint hash = murmur3_32(256 - i, vector, i);
     hashes[i * 4] = (jbyte) hash;
-    hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8);
-    hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16);
-    hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24);
+    hashes[i * 4 + 1] = (jbyte)(hash >> 8);
+    hashes[i * 4 + 2] = (jbyte)(hash >> 16);
+    hashes[i * 4 + 3] = (jbyte)(hash >> 24);
   }
 
   // hash to get const result.
@@ -269,7 +269,7 @@
 }
 
 void AltHashing::testEquivalentHashes() {
-  jint jbytes, jchars, ints;
+  juint jbytes, jchars, ints;
 
   // printf("testEquivalentHashes\n");
 
--- a/src/share/vm/classfile/altHashing.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/classfile/altHashing.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,24 +39,24 @@
 class AltHashing : AllStatic {
 
   // utility function copied from java/lang/Integer
-  static jint Integer_rotateLeft(jint i, int distance) {
-    return (i << distance) | (((juint)i) >> (32-distance));
+  static juint Integer_rotateLeft(juint i, int distance) {
+    return (i << distance) | (i >> (32-distance));
   }
-  static jint murmur3_32(const int* data, int len);
-  static jint murmur3_32(jint seed, const int* data, int len);
+  static juint murmur3_32(const int* data, int len);
+  static juint murmur3_32(juint seed, const int* data, int len);
 
 #ifndef PRODUCT
   // Hashing functions used for internal testing
-  static jint murmur3_32(const jbyte* data, int len);
-  static jint murmur3_32(const jchar* data, int len);
+  static juint murmur3_32(const jbyte* data, int len);
+  static juint murmur3_32(const jchar* data, int len);
   static void testMurmur3_32_ByteArray();
   static void testEquivalentHashes();
 #endif // PRODUCT
 
  public:
-  static jint compute_seed();
-  static jint murmur3_32(jint seed, const jbyte* data, int len);
-  static jint murmur3_32(jint seed, const jchar* data, int len);
+  static juint compute_seed();
+  static juint murmur3_32(juint seed, const jbyte* data, int len);
+  static juint murmur3_32(juint seed, const jchar* data, int len);
   NOT_PRODUCT(static void test_alt_hash();)
 };
 #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
--- a/src/share/vm/oops/instanceKlass.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -554,6 +554,7 @@
     if (hk == NULL) {
       return NULL;
     } else {
+      assert(*hk != NULL, "host klass should always be set if the address is not null");
       return *hk;
     }
   }
--- a/src/share/vm/oops/metadata.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/metadata.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
   int identity_hash()                { return (int)(uintptr_t)this; }
 
   // Rehashing support for tables containing pointers to this
-  unsigned int new_hash(jint seed)   { ShouldNotReachHere();  return 0; }
+  unsigned int new_hash(juint seed)   { ShouldNotReachHere();  return 0; }
 
   virtual bool is_klass()              const volatile { return false; }
   virtual bool is_method()             const volatile { return false; }
--- a/src/share/vm/oops/oop.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/oop.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -102,7 +102,7 @@
 }
 
 // When String table needs to rehash
-unsigned int oopDesc::new_hash(jint seed) {
+unsigned int oopDesc::new_hash(juint seed) {
   EXCEPTION_MARK;
   ResourceMark rm;
   int length;
--- a/src/share/vm/oops/oop.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/oop.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -362,7 +362,7 @@
   intptr_t slow_identity_hash();
 
   // Alternate hashing code if string table is rehashed
-  unsigned int new_hash(jint seed);
+  unsigned int new_hash(juint seed);
 
   // marks are forwarded to stack when object is locked
   bool     has_displaced_mark() const;
--- a/src/share/vm/oops/symbol.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/symbol.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,7 +207,7 @@
 }
 
 // Alternate hashing for unbalanced symbol tables.
-unsigned int Symbol::new_hash(jint seed) {
+unsigned int Symbol::new_hash(juint seed) {
   ResourceMark rm;
   // Use alternate hashing algorithm on this symbol.
   return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length());
--- a/src/share/vm/oops/symbol.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/oops/symbol.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -154,7 +154,7 @@
   int identity_hash()       { return _identity_hash; }
 
   // For symbol table alternate hashing
-  unsigned int new_hash(jint seed);
+  unsigned int new_hash(juint seed);
 
   // Reference counting.  See comments above this class for when to use.
   int refcount() const      { return _refcount; }
--- a/src/share/vm/opto/graphKit.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -2994,22 +2994,28 @@
   }
 
   Node* cast_obj = NULL;
-  const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
-  // We may not have profiling here or it may not help us. If we have
-  // a speculative type use it to perform an exact cast.
-  ciKlass* spec_obj_type = obj_type->speculative_type();
-  if (spec_obj_type != NULL ||
-      (data != NULL &&
-       // Counter has never been decremented (due to cast failure).
-       // ...This is a reasonable thing to expect.  It is true of
-       // all casts inserted by javac to implement generic types.
-       data->as_CounterData()->count() >= 0)) {
-    cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
-    if (cast_obj != NULL) {
-      if (failure_control != NULL) // failure is now impossible
-        (*failure_control) = top();
-      // adjust the type of the phi to the exact klass:
-      phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+  if (tk->klass_is_exact()) {
+    // The following optimization tries to statically cast the speculative type of the object
+    // (for example obtained during profiling) to the type of the superklass and then do a
+    // dynamic check that the type of the object is what we expect. To work correctly
+    // for checkcast and aastore the type of superklass should be exact.
+    const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
+    // We may not have profiling here or it may not help us. If we have
+    // a speculative type use it to perform an exact cast.
+    ciKlass* spec_obj_type = obj_type->speculative_type();
+    if (spec_obj_type != NULL ||
+        (data != NULL &&
+         // Counter has never been decremented (due to cast failure).
+         // ...This is a reasonable thing to expect.  It is true of
+         // all casts inserted by javac to implement generic types.
+         data->as_CounterData()->count() >= 0)) {
+      cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
+      if (cast_obj != NULL) {
+        if (failure_control != NULL) // failure is now impossible
+          (*failure_control) = top();
+        // adjust the type of the phi to the exact klass:
+        phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+      }
     }
   }
 
--- a/src/share/vm/opto/library_call.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/opto/library_call.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -3237,7 +3237,8 @@
 // private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
 bool LibraryCallKit::inline_native_isInterrupted() {
   // Add a fast path to t.isInterrupted(clear_int):
-  //   (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
+  //   (t == Thread.current() &&
+  //    (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int)))
   //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
   // So, in the common case that the interrupt bit is false,
   // we avoid making a call into the VM.  Even if the interrupt bit
@@ -3294,6 +3295,7 @@
   // drop through to next case
   set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));
 
+#ifndef TARGET_OS_FAMILY_windows
   // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
   Node* clr_arg = argument(1);
   Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
@@ -3307,6 +3309,10 @@
 
   // drop through to next case
   set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
+#else
+  // To return true on Windows you must read the _interrupted field
+  // and check the the event state i.e. take the slow path.
+#endif // TARGET_OS_FAMILY_windows
 
   // (d) Otherwise, go to the slow path.
   slow_region->add_req(control());
--- a/src/share/vm/prims/jni.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/prims/jni.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -4450,8 +4450,23 @@
 
     // Get needed field and method IDs
     directByteBufferConstructor = env->GetMethodID(directByteBufferClass, "<init>", "(JI)V");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }
     directBufferAddressField    = env->GetFieldID(bufferClass, "address", "J");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }
     bufferCapacityField         = env->GetFieldID(bufferClass, "capacity", "I");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }
 
     if ((directByteBufferConstructor == NULL) ||
         (directBufferAddressField    == NULL) ||
--- a/src/share/vm/prims/unsafe.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -858,6 +858,11 @@
   strcpy(buf, "java/lang/");
   strcat(buf, ename);
   jclass cls = env->FindClass(buf);
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf);
+    return;
+  }
   char* msg = NULL;
   env->ThrowNew(cls, msg);
 }
--- a/src/share/vm/prims/whitebox.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/prims/whitebox.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -316,9 +316,10 @@
 
 WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  int result = 0;
+  CHECK_JNI_EXCEPTION_(env, result);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
-  int result = 0;
   nmethod* code;
   if (is_osr) {
     int bci = InvocationEntryBci;
@@ -344,6 +345,7 @@
 
 WB_ENTRY(jboolean, WB_IsMethodCompiled(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
@@ -355,6 +357,7 @@
 
 WB_ENTRY(jboolean, WB_IsMethodCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   if (is_osr) {
@@ -366,6 +369,7 @@
 
 WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   return mh->queued_for_compilation();
@@ -373,6 +377,7 @@
 
 WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, CompLevel_none);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
   return (code != NULL ? code->comp_level() : CompLevel_none);
@@ -380,6 +385,7 @@
 
 WB_ENTRY(void, WB_MakeMethodNotCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION(env);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   if (is_osr) {
     mh->set_not_osr_compilable(comp_level, true /* report */, "WhiteBox");
@@ -390,6 +396,7 @@
 
 WB_ENTRY(jint, WB_GetMethodEntryBci(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, InvocationEntryBci);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false);
   return (code != NULL && code->is_osr_method() ? code->osr_entry_bci() : InvocationEntryBci);
@@ -397,6 +404,7 @@
 
 WB_ENTRY(jboolean, WB_TestSetDontInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   bool result = mh->dont_inline();
   mh->set_dont_inline(value == JNI_TRUE);
@@ -414,6 +422,7 @@
 
 WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   bool result = mh->force_inline();
   mh->set_force_inline(value == JNI_TRUE);
@@ -422,6 +431,7 @@
 
 WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD);
   MutexLockerEx mu(Compile_lock);
@@ -430,6 +440,7 @@
 
 WB_ENTRY(void, WB_ClearMethodState(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION(env);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   MutexLockerEx mu(Compile_lock);
   MethodData* mdo = mh->method_data();
@@ -616,14 +627,18 @@
         bool result = true;
         //  one by one registration natives for exception catching
         jclass exceptionKlass = env->FindClass(vmSymbols::java_lang_NoSuchMethodError()->as_C_string());
+        CHECK_JNI_EXCEPTION(env);
         for (int i = 0, n = sizeof(methods) / sizeof(methods[0]); i < n; ++i) {
           if (env->RegisterNatives(wbclass, methods + i, 1) != 0) {
             result = false;
-            if (env->ExceptionCheck() && env->IsInstanceOf(env->ExceptionOccurred(), exceptionKlass)) {
-              // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
-              // ignoring the exception
-              tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+            jthrowable throwable_obj = env->ExceptionOccurred();
+            if (throwable_obj != NULL) {
               env->ExceptionClear();
+              if (env->IsInstanceOf(throwable_obj, exceptionKlass)) {
+                // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
+                // ignoring the exception
+                tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+              }
             } else {
               // register is failed w/o exception or w/ unexpected exception
               tty->print_cr("Warning: unexpected error on register of sun.hotspot.WhiteBox::%s%s. All methods will be unregistered", methods[i].name, methods[i].signature);
--- a/src/share/vm/prims/whitebox.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/prims/whitebox.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -36,6 +36,24 @@
 #define WB_END JNI_END
 #define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL
 
+#define CHECK_JNI_EXCEPTION_(env, value)                               \
+  do {                                                                 \
+    JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+    if (HAS_PENDING_EXCEPTION) {                                       \
+      CLEAR_PENDING_EXCEPTION;                                         \
+      return(value);                                                   \
+    }                                                                  \
+  } while (0)
+
+#define CHECK_JNI_EXCEPTION(env)                                       \
+  do {                                                                 \
+    JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+    if (HAS_PENDING_EXCEPTION) {                                       \
+      CLEAR_PENDING_EXCEPTION;                                         \
+      return;                                                          \
+    }                                                                  \
+  } while (0)
+
 class WhiteBox : public AllStatic {
  private:
   static bool _used;
--- a/src/share/vm/runtime/os.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/runtime/os.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -430,7 +430,10 @@
   static intx current_thread_id();
   static int current_process_id();
   static int sleep(Thread* thread, jlong ms, bool interruptable);
-  static int naked_sleep();
+  // Short standalone OS sleep suitable for slow path spin loop.
+  // Ignores Thread.interrupt() (so keep it short).
+  // ms = 0, will sleep for the least amount of time allowed by the OS.
+  static void naked_short_sleep(jlong ms);
   static void infinite_sleep(); // never returns, use with CAUTION
   static void yield();        // Yields to all threads with same priority
   enum YieldResult {
--- a/src/share/vm/runtime/park.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/runtime/park.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -59,58 +59,22 @@
 
   // Start by trying to recycle an existing but unassociated
   // ParkEvent from the global free list.
-  for (;;) {
-    ev = FreeList ;
-    if (ev == NULL) break ;
-    // 1: Detach - sequester or privatize the list
-    // Tantamount to ev = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
-       continue ;
+  // Using a spin lock since we are part of the mutex impl.
+  // 8028280: using concurrent free list without memory management can leak
+  // pretty badly it turns out.
+  Thread::SpinAcquire(&ListLock, "ParkEventFreeListAllocate");
+  {
+    ev = FreeList;
+    if (ev != NULL) {
+      FreeList = ev->FreeNext;
     }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    ParkEvent * List = ev->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        ParkEvent * Arv =  (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
-        }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        ParkEvent * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
   }
+  Thread::SpinRelease(&ListLock);
 
   if (ev != NULL) {
     guarantee (ev->AssociatedWith == NULL, "invariant") ;
   } else {
     // Do this the hard way -- materialize a new ParkEvent.
-    // In rare cases an allocating thread might detach a long list --
-    // installing null into FreeList -- and then stall or be obstructed.
-    // A 2nd thread calling Allocate() would see FreeList == null.
-    // The list held privately by the 1st thread is unavailable to the 2nd thread.
-    // In that case the 2nd thread would have to materialize a new ParkEvent,
-    // even though free ParkEvents existed in the system.  In this case we end up
-    // with more ParkEvents in circulation than we need, but the race is
-    // rare and the outcome is benign.  Ideally, the # of extant ParkEvents
-    // is equal to the maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the freelist
-    // can be transiently inaccessible.  At worst we may end up with the
-    // # of ParkEvents in circulation slightly above the ideal.
-    // Note that if we didn't have the TSM/immortal constraint, then
-    // when reattaching, above, we could trim the list.
     ev = new ParkEvent () ;
     guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
   }
@@ -124,13 +88,14 @@
   if (ev == NULL) return ;
   guarantee (ev->FreeNext == NULL      , "invariant") ;
   ev->AssociatedWith = NULL ;
-  for (;;) {
-    // Push ev onto FreeList
-    // The mechanism is "half" lock-free.
-    ParkEvent * List = FreeList ;
-    ev->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
+  // Note that if we didn't have the TSM/immortal constraint, then
+  // when reattaching we could trim the list.
+  Thread::SpinAcquire(&ListLock, "ParkEventFreeListRelease");
+  {
+    ev->FreeNext = FreeList;
+    FreeList = ev;
   }
+  Thread::SpinRelease(&ListLock);
 }
 
 // Override operator new and delete so we can ensure that the
@@ -164,56 +129,21 @@
 
   // Start by trying to recycle an existing but unassociated
   // Parker from the global free list.
-  for (;;) {
-    p = FreeList ;
-    if (p  == NULL) break ;
-    // 1: Detach
-    // Tantamount to p = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
-       continue ;
+  // 8028280: using concurrent free list without memory management can leak
+  // pretty badly it turns out.
+  Thread::SpinAcquire(&ListLock, "ParkerFreeListAllocate");
+  {
+    p = FreeList;
+    if (p != NULL) {
+      FreeList = p->FreeNext;
     }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    Parker * List = p->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        Parker * Arv =  (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
-        }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        Parker * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
   }
+  Thread::SpinRelease(&ListLock);
 
   if (p != NULL) {
     guarantee (p->AssociatedWith == NULL, "invariant") ;
   } else {
     // Do this the hard way -- materialize a new Parker..
-    // In rare cases an allocating thread might detach
-    // a long list -- installing null into FreeList --and
-    // then stall.  Another thread calling Allocate() would see
-    // FreeList == null and then invoke the ctor.  In this case we
-    // end up with more Parkers in circulation than we need, but
-    // the race is rare and the outcome is benign.
-    // Ideally, the # of extant Parkers is equal to the
-    // maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the
-    // freelist can be transiently inaccessible.  At worst
-    // we may end up with the # of Parkers in circulation
-    // slightly above the ideal.
     p = new Parker() ;
   }
   p->AssociatedWith = t ;          // Associate p with t
@@ -227,11 +157,12 @@
   guarantee (p->AssociatedWith != NULL, "invariant") ;
   guarantee (p->FreeNext == NULL      , "invariant") ;
   p->AssociatedWith = NULL ;
-  for (;;) {
-    // Push p onto FreeList
-    Parker * List = FreeList ;
-    p->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
+
+  Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease");
+  {
+    p->FreeNext = FreeList;
+    FreeList = p;
   }
+  Thread::SpinRelease(&ListLock);
 }
 
--- a/src/share/vm/runtime/thread.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/runtime/thread.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -4446,9 +4446,7 @@
         ++ctr ;
         if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
            if (Yields > 5) {
-             // Consider using a simple NakedSleep() instead.
-             // Then SpinAcquire could be called by non-JVM threads
-             Thread::current()->_ParkEvent->park(1) ;
+             os::naked_short_sleep(1);
            } else {
              os::NakedYield() ;
              ++Yields ;
--- a/src/share/vm/utilities/hashtable.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/utilities/hashtable.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,7 +93,7 @@
   return false;
 }
 
-template <class T, MEMFLAGS F> jint Hashtable<T, F>::_seed = 0;
+template <class T, MEMFLAGS F> juint Hashtable<T, F>::_seed = 0;
 
 // Create a new table and using alternate hash code, populate the new table
 // with the existing elements.   This can be used to change the hash code
--- a/src/share/vm/utilities/hashtable.hpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/utilities/hashtable.hpp	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -280,7 +280,7 @@
   // Function to move these elements into the new table.
   void move_to(Hashtable<T, F>* new_table);
   static bool use_alternate_hashcode()  { return _seed != 0; }
-  static jint seed()                    { return _seed; }
+  static juint seed()                    { return _seed; }
 
   static int literal_size(Symbol *symbol);
   static int literal_size(oop oop);
@@ -296,7 +296,7 @@
   void dump_table(outputStream* st, const char *table_name);
 
  private:
-  static jint _seed;
+  static juint _seed;
 };
 
 
--- a/src/share/vm/utilities/vmError.cpp	Wed Mar 12 14:10:31 2014 -0700
+++ b/src/share/vm/utilities/vmError.cpp	Thu Mar 13 09:57:31 2014 -0700
@@ -592,13 +592,24 @@
              st->cr();
              // Compiled code may use EBP register on x86 so it looks like
              // non-walkable C frame. Use frame.sender() for java frames.
-             if (_thread && _thread->is_Java_thread() && fr.is_java_frame()) {
-               RegisterMap map((JavaThread*)_thread, false); // No update
-               fr = fr.sender(&map);
-               continue;
+             if (_thread && _thread->is_Java_thread()) {
+               // Catch very first native frame by using stack address.
+               // For JavaThread stack_base and stack_size should be set.
+               if (!_thread->on_local_stack((address)(fr.sender_sp() + 1))) {
+                 break;
+               }
+               if (fr.is_java_frame()) {
+                 RegisterMap map((JavaThread*)_thread, false); // No update
+                 fr = fr.sender(&map);
+               } else {
+                 fr = os::get_sender_for_C_frame(&fr);
+               }
+             } else {
+               // is_first_C_frame() does only simple checks for frame pointer,
+               // it will pass if java compiled code has a pointer in EBP.
+               if (os::is_first_C_frame(&fr)) break;
+               fr = os::get_sender_for_C_frame(&fr);
              }
-             if (os::is_first_C_frame(&fr)) break;
-             fr = os::get_sender_for_C_frame(&fr);
           }
 
           if (count > StackPrintLimit) {
--- a/test/TEST.groups	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/TEST.groups	Thu Mar 13 09:57:31 2014 -0700
@@ -131,7 +131,9 @@
   gc/arguments/TestG1HeapRegionSize.java \
   gc/metaspace/TestMetaspaceMemoryPool.java \
   runtime/InternalApi/ThreadCpuTimesDeadlock.java \
-  serviceability/threads/TestFalseDeadLock.java
+  serviceability/threads/TestFalseDeadLock.java \
+  compiler/tiered/NonTieredLevelsTest.java \
+  compiler/tiered/TieredLevelsTest.java
 
 # Compact 2 adds full VM tests
 compact2 = \
--- a/test/compiler/ciReplay/TestVM.sh	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/ciReplay/TestVM.sh	Thu Mar 13 09:57:31 2014 -0700
@@ -78,8 +78,8 @@
         positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \
                 "-XX:TieredStopAtLevel=$stop_level"
         stop_level=`expr $stop_level + 1`
+        cleanup
     done
-    cleanup
 fi
 
 echo TEST PASSED
--- a/test/compiler/ciReplay/common.sh	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/ciReplay/common.sh	Thu Mar 13 09:57:31 2014 -0700
@@ -99,14 +99,13 @@
 # $2 - non-tiered comp_level 
 nontiered_tests() {
     level=`grep "^compile " $replay_data | awk '{print $6}'`
-    # is level available in non-tiere
+    # is level available in non-tiered
     if [ "$level" -eq $2 ]
     then
         positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \
                 -XX:-TieredCompilation
     else
         negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
-        negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
                 -XX:-TieredCompilation
     fi
 }
--- a/test/compiler/tiered/NonTieredLevelsTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/tiered/NonTieredLevelsTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,6 +70,9 @@
 
     @Override
     protected void test() throws Exception {
+        if (skipXcompOSR()) {
+          return;
+        }
         checkNotCompiled();
         compile();
         checkCompiled();
--- a/test/compiler/tiered/TieredLevelsTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/tiered/TieredLevelsTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,9 @@
 
     @Override
     protected void test() throws Exception {
+        if (skipXcompOSR()) {
+          return;
+        }
         checkNotCompiled();
         compile();
         checkCompiled();
--- a/test/compiler/whitebox/CompilerWhiteBoxTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/whitebox/CompilerWhiteBoxTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -380,6 +380,20 @@
         /** flag for OSR test case */
         boolean isOsr();
     }
+
+    /**
+     * @return {@code true} if the current test case is OSR and the mode is
+     *          Xcomp, otherwise {@code false}
+     */
+    protected boolean skipXcompOSR() {
+        boolean result =  testCase.isOsr()
+                && CompilerWhiteBoxTest.MODE.startsWith("compiled ");
+        if (result && IS_VERBOSE) {
+            System.err.printf("Warning: %s is not applicable in %s%n",
+                    testCase.name(), CompilerWhiteBoxTest.MODE);
+        }
+        return result;
+    }
 }
 
 enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
--- a/test/compiler/whitebox/DeoptimizeAllTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/whitebox/DeoptimizeAllTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         compile();
         checkCompiled();
--- a/test/compiler/whitebox/DeoptimizeMethodTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/whitebox/DeoptimizeMethodTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         compile();
         checkCompiled();
--- a/test/compiler/whitebox/IsMethodCompilableTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/whitebox/IsMethodCompilableTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,10 +66,7 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
+        if (skipXcompOSR()) {
           return;
         }
         if (!isCompilable()) {
--- a/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Wed Mar 12 14:10:31 2014 -0700
+++ b/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Thu Mar 13 09:57:31 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,11 +53,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         checkNotCompiled();
         if (!isCompilable()) {