Mercurial > hg > graal-compiler

--- a/.hgtags	Tue Mar 25 12:54:21 2014 -0700
+++ b/.hgtags	Tue Mar 25 17:07:36 2014 -0700
@@ -432,3 +432,8 @@
 ecf3678d5736a645aea893b525a9eb5fa1a8e072 hs25.20-b04
 51e1bb81df8680bd237630323de5e0704fb25607 jdk8u20-b03
 54436d3b2a915ff50a8d6b34f61d5afb45be7bb6 hs25.20-b05
+d4e18f0633c662588cc0875be7759721c7d85af4 jdk8u20-b04
+57eb3e69397e9d5818c5fdaef65b47d9b03f7f88 jdk8u20-b05
+804f89b6ff46728d60a69e9a338e63f362f7ac68 hs25.20-b06
+c3d92e04873788275eeebec6bcd2948cdbd143a7 jdk8u20-b06
+39eae002499704438142e78f5e0e24d46d0b266f hs25.20-b07
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Tue Mar 25 17:07:36 2014 -0700
@@ -51,9 +51,9 @@
     static private CIntegerField summaryBytesUsedField;
     // G1MonitoringSupport* _g1mm;
     static private AddressField g1mmField;
-    // MasterOldRegionSet _old_set;
+    // HeapRegionSet _old_set;
     static private long oldSetFieldOffset;
-    // MasterHumongousRegionSet _humongous_set;
+    // HeapRegionSet _humongous_set;
     static private long humongousSetFieldOffset;

     static {
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Tue Mar 25 17:07:36 2014 -0700
@@ -40,12 +40,8 @@
 // Mirror class for HeapRegionSetBase. Represents a group of regions.

 public class HeapRegionSetBase extends VMObject {
-    // uint _length;
-    static private CIntegerField lengthField;
-    // uint _region_num;
-    static private CIntegerField regionNumField;
-    // size_t _total_used_bytes;
-    static private CIntegerField totalUsedBytesField;
+
+    static private long countField;

     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -58,21 +54,13 @@
     static private synchronized void initialize(TypeDataBase db) {
         Type type = db.lookupType("HeapRegionSetBase");

-        lengthField         = type.getCIntegerField("_length");
-        regionNumField      = type.getCIntegerField("_region_num");
-        totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
+        countField = type.getField("_count").getOffset();
     }

-    public long length() {
-        return lengthField.getValue(addr);
-    }

-    public long regionNum() {
-        return regionNumField.getValue(addr);
-    }
-
-    public long totalUsedBytes() {
-        return totalUsedBytesField.getValue(addr);
+    public HeapRegionSetCount count() {
+        Address countFieldAddr = addr.addOffsetTo(countField);
+        return (HeapRegionSetCount) VMObjectFactory.newObject(HeapRegionSetCount.class, countFieldAddr);
     }

     public HeapRegionSetBase(Address addr) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc_implementation.g1;
+
+import java.util.Iterator;
+import java.util.Observable;
+import java.util.Observer;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
+import sun.jvm.hotspot.types.AddressField;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionSetCount. Represents a group of regions.
+
+public class HeapRegionSetCount extends VMObject {
+
+    static private CIntegerField lengthField;
+    static private CIntegerField capacityField;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+            });
+    }
+
+    static private synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionSetCount");
+
+        lengthField   = type.getCIntegerField("_length");
+        capacityField = type.getCIntegerField("_capacity");
+    }
+
+    public long length() {
+        return lengthField.getValue(addr);
+    }
+
+    public long capacity() {
+        return capacityField.getValue(addr);
+    }
+
+    public HeapRegionSetCount(Address addr) {
+        super(addr);
+    }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Tue Mar 25 17:07:36 2014 -0700
@@ -114,7 +114,8 @@
              long survivorRegionNum = g1mm.survivorRegionNum();
              HeapRegionSetBase oldSet = g1h.oldSet();
              HeapRegionSetBase humongousSet = g1h.humongousSet();
-             long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
+             long oldRegionNum = oldSet.count().length()
+                          + humongousSet.count().capacity() / HeapRegion.grainBytes();
              printG1Space("G1 Heap:", g1h.n_regions(),
                           g1h.used(), g1h.capacity());
              System.out.println("G1 Young Generation:");
--- a/make/excludeSrc.make	Tue Mar 25 12:54:21 2014 -0700
+++ b/make/excludeSrc.make	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -87,9 +87,10 @@
 	g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
 	g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
 	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \
-	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
+	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1StringDedup.cpp g1StringDedupStat.cpp \
+	g1StringDedupTable.cpp g1StringDedupThread.cpp g1StringDedupQueue.cpp g1_globals.cpp heapRegion.cpp \
 	g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
-	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \
+	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp g1CodeCacheRemSet.cpp \
 	adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \
 	cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \
 	parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \
--- a/make/hotspot_version	Tue Mar 25 12:54:21 2014 -0700
+++ b/make/hotspot_version	Tue Mar 25 17:07:36 2014 -0700
@@ -35,7 +35,7 @@

 HS_MAJOR_VER=25
 HS_MINOR_VER=20
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=08

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/sparc/vm/sparc.ad	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Tue Mar 25 17:07:36 2014 -0700
@@ -2071,19 +2071,6 @@
   return L7_REGP_mask();
 }

-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return G1_REGI_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  return G1_REGL_mask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
-
 %}
--- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1089,6 +1089,21 @@
   emit_arith(0x23, 0xC0, dst, src);
 }

+void Assembler::andnl(Register dst, Register src1, Register src2) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::andnl(Register dst, Register src1, Address src2) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38(dst, src1, src2);
+  emit_int8((unsigned char)0xF2);
+  emit_operand(dst, src2);
+}
+
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_int8(0x0F);
@@ -1097,7 +1112,6 @@
 }

 void Assembler::bsrl(Register dst, Register src) {
-  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_int8(0x0F);
   emit_int8((unsigned char)0xBD);
@@ -1110,6 +1124,51 @@
   emit_int8((unsigned char)(0xC8 | encode));
 }

+void Assembler::blsil(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsil(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38(rbx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rbx, src);
+}
+
+void Assembler::blsmskl(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsmskl(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38(rdx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rdx, src);
+}
+
+void Assembler::blsrl(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsrl(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38(rcx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rcx, src);
+}
+
 void Assembler::call(Label& L, relocInfo::relocType rtype) {
   // suspect disp32 is always good
   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
@@ -2283,6 +2342,11 @@
   emit_int8(imm8);
 }

+void Assembler::pause() {
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)0x90);
+}
+
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
@@ -2607,6 +2671,11 @@
   }
 }

+void Assembler::rdtsc() {
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0x31);
+}
+
 // copies data from [esi] to [edi] using rcx pointer sized words
 // generic
 void Assembler::rep_mov() {
@@ -2878,6 +2947,24 @@
   emit_operand(dst, src);
 }

+void Assembler::tzcntl(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+  emit_int8((unsigned char)0xF3);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)0xC0 | encode);
+}
+
+void Assembler::tzcntq(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
+  emit_int8((unsigned char)0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xBC);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
@@ -2898,6 +2985,11 @@
   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }

+void Assembler::xabort(int8_t imm8) {
+  emit_int8((unsigned char)0xC6);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(imm8 & 0xFF));
+}

 void Assembler::xaddl(Address dst, Register src) {
   InstructionMark im(this);
@@ -2907,6 +2999,24 @@
   emit_operand(src, dst);
 }

+void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
+  InstructionMark im(this);
+  relocate(rtype);
+  if (abort.is_bound()) {
+    address entry = target(abort);
+    assert(entry != NULL, "abort entry NULL");
+    intptr_t offset = entry - pc();
+    emit_int8((unsigned char)0xC7);
+    emit_int8((unsigned char)0xF8);
+    emit_int32(offset - 6); // 2 opcode + 4 address
+  } else {
+    abort.add_patch_at(code(), locator());
+    emit_int8((unsigned char)0xC7);
+    emit_int8((unsigned char)0xF8);
+    emit_int32(0);
+  }
+}
+
 void Assembler::xchgl(Register dst, Address src) { // xchg
   InstructionMark im(this);
   prefix(src, dst);
@@ -2920,6 +3030,12 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }

+void Assembler::xend() {
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0x01);
+  emit_int8((unsigned char)0xD5);
+}
+
 void Assembler::xgetbv() {
   emit_int8(0x0F);
   emit_int8(0x01);
@@ -4837,6 +4953,21 @@
   emit_arith(0x23, 0xC0, dst, src);
 }

+void Assembler::andnq(Register dst, Register src1, Register src2) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::andnq(Register dst, Register src1, Address src2) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38_q(dst, src1, src2);
+  emit_int8((unsigned char)0xF2);
+  emit_operand(dst, src2);
+}
+
 void Assembler::bsfq(Register dst, Register src) {
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   emit_int8(0x0F);
@@ -4845,7 +4976,6 @@
 }

 void Assembler::bsrq(Register dst, Register src) {
-  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   emit_int8(0x0F);
   emit_int8((unsigned char)0xBD);
@@ -4858,6 +4988,51 @@
   emit_int8((unsigned char)(0xC8 | encode));
 }

+void Assembler::blsiq(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsiq(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38_q(rbx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rbx, src);
+}
+
+void Assembler::blsmskq(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsmskq(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38_q(rdx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rdx, src);
+}
+
+void Assembler::blsrq(Register dst, Register src) {
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::blsrq(Register dst, Address src) {
+  InstructionMark im(this);
+  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
+  vex_prefix_0F38_q(rcx, dst, src);
+  emit_int8((unsigned char)0xF3);
+  emit_operand(rcx, src);
+}
+
 void Assembler::cdqq() {
   prefix(REX_W);
   emit_int8((unsigned char)0x99);
--- a/src/cpu/x86/vm/assembler_x86.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -590,10 +590,35 @@
     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
   }

+  void vex_prefix_0F38(Register dst, Register nds, Address src) {
+    bool vex_w = false;
+    bool vector256 = false;
+    vex_prefix(src, nds->encoding(), dst->encoding(),
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+  }
+
+  void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
+    bool vex_w = true;
+    bool vector256 = false;
+    vex_prefix(src, nds->encoding(), dst->encoding(),
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+  }
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
                              bool vex_w, bool vector256);

+  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
+    bool vex_w = false;
+    bool vector256 = false;
+    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+  }
+  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
+    bool vex_w = true;
+    bool vector256 = false;
+    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+  }
   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, bool vector256 = false,
                              VexOpcode opc = VEX_OPCODE_0F) {
@@ -897,6 +922,27 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);

+  // BMI instructions
+  void andnl(Register dst, Register src1, Register src2);
+  void andnl(Register dst, Register src1, Address src2);
+  void andnq(Register dst, Register src1, Register src2);
+  void andnq(Register dst, Register src1, Address src2);
+
+  void blsil(Register dst, Register src);
+  void blsil(Register dst, Address src);
+  void blsiq(Register dst, Register src);
+  void blsiq(Register dst, Address src);
+
+  void blsmskl(Register dst, Register src);
+  void blsmskl(Register dst, Address src);
+  void blsmskq(Register dst, Register src);
+  void blsmskq(Register dst, Address src);
+
+  void blsrl(Register dst, Register src);
+  void blsrl(Register dst, Address src);
+  void blsrq(Register dst, Register src);
+  void blsrq(Register dst, Address src);
+
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);

@@ -1405,6 +1451,8 @@
   // Pemutation of 64bit words
   void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);

+  void pause();
+
   // SSE4.2 string instructions
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
@@ -1489,6 +1537,8 @@

   void rclq(Register dst, int imm8);

+  void rdtsc();
+
   void ret(int imm16);

   void sahf();
@@ -1574,6 +1624,9 @@
   void testq(Register dst, int32_t imm32);
   void testq(Register dst, Register src);

+  // BMI - count trailing zeros
+  void tzcntl(Register dst, Register src);
+  void tzcntq(Register dst, Register src);

   // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
   void ucomisd(XMMRegister dst, Address src);
@@ -1583,16 +1636,22 @@
   void ucomiss(XMMRegister dst, Address src);
   void ucomiss(XMMRegister dst, XMMRegister src);

+  void xabort(int8_t imm8);
+
   void xaddl(Address dst, Register src);

   void xaddq(Address dst, Register src);

+  void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
+
   void xchgl(Register reg, Address adr);
   void xchgl(Register dst, Register src);

   void xchgq(Register reg, Address adr);
   void xchgq(Register dst, Register src);

+  void xend();
+
   // Get Value of Extended Control Register
   void xgetbv();
--- a/src/cpu/x86/vm/globals_x86.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -129,11 +129,53 @@
   product(bool, UseFastStosb, false,                                        \
           "Use fast-string operation for zeroing: rep stosb")               \
                                                                             \
+  /* Use Restricted Transactional Memory for lock eliding */                \
+  experimental(bool, UseRTMLocking, false,                                  \
+          "Enable RTM lock eliding for inflated locks in compiled code")    \
+                                                                            \
+  experimental(bool, UseRTMForStackLocks, false,                            \
+          "Enable RTM lock eliding for stack locks in compiled code")       \
+                                                                            \
+  experimental(bool, UseRTMDeopt, false,                                    \
+          "Perform deopt and recompilation based on RTM abort ratio")       \
+                                                                            \
+  experimental(uintx, RTMRetryCount, 5,                                     \
+          "Number of RTM retries on lock abort or busy")                    \
+                                                                            \
+  experimental(intx, RTMSpinLoopCount, 100,                                 \
+          "Spin count for lock to become free before RTM retry")            \
+                                                                            \
+  experimental(intx, RTMAbortThreshold, 1000,                               \
+          "Calculate abort ratio after this number of aborts")              \
+                                                                            \
+  experimental(intx, RTMLockingThreshold, 10000,                            \
+          "Lock count at which to do RTM lock eliding without "             \
+          "abort ratio calculation")                                        \
+                                                                            \
+  experimental(intx, RTMAbortRatio, 50,                                     \
+          "Lock abort ratio at which to stop use RTM lock eliding")         \
+                                                                            \
+  experimental(intx, RTMTotalCountIncrRate, 64,                             \
+          "Increment total RTM attempted lock count once every n times")    \
+                                                                            \
+  experimental(intx, RTMLockingCalculationDelay, 0,                         \
+          "Number of milliseconds to wait before start calculating aborts " \
+          "for RTM locking")                                                \
+                                                                            \
+  experimental(bool, UseRTMXendForLockBusy, false,                          \
+          "Use RTM Xend instead of Xabort when lock busy")                  \
+                                                                            \
   /* assembler */                                                           \
   product(bool, Use486InstrsOnly, false,                                    \
           "Use 80486 Compliant instruction subset")                         \
                                                                             \
   product(bool, UseCountLeadingZerosInstruction, false,                     \
           "Use count leading zeros instruction")                            \
+                                                                            \
+  product(bool, UseCountTrailingZerosInstruction, false,                    \
+          "Use count trailing zeros instruction")                           \
+                                                                            \
+  product(bool, UseBMI1Instructions, false,                                 \
+          "Use BMI instructions")

 #endif // CPU_X86_VM_GLOBALS_X86_HPP
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -98,217 +98,6 @@
   return Address::make_array(adr);
 }

-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
-  assert_different_registers(lock_reg, obj_reg, swap_reg);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  bool need_tmp_reg = false;
-  if (tmp_reg == noreg) {
-    need_tmp_reg = true;
-    tmp_reg = lock_reg;
-  } else {
-    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  }
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movl(swap_reg, mark_addr);
-  }
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, swap_reg);
-  andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  // Note that because there is no current thread register on x86 we
-  // need to store off the mark word we read out of the object to
-  // avoid reloading it and needing to recheck invariants below. This
-  // store is unfortunate but it makes the overall code shorter and
-  // simpler.
-  movl(saved_mark_addr, swap_reg);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  xorl(swap_reg, tmp_reg);
-  if (swap_reg_contains_mark) {
-    null_check_offset = offset();
-  }
-  movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testl(swap_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  movl(swap_reg, saved_mark_addr);
-  andl(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  orl(tmp_reg, swap_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  get_thread(tmp_reg);
-  movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
-  movl(swap_reg, saved_mark_addr);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  movl(swap_reg, saved_mark_addr);
-  if (need_tmp_reg) {
-    push(tmp_reg);
-  }
-  movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(tmp_reg, Address(obj_reg, 0));
-  if (need_tmp_reg) {
-    pop(tmp_reg);
-  }
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments) {
   call(RuntimeAddress(entry_point));
@@ -512,7 +301,9 @@
   mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 }

-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
+  // scratch register is not used,
+  // it is defined to match parameters of 64-bit version of this method.
   if (src.is_lval()) {
     mov_literal32(dst, (intptr_t)src.target(), src.rspec());
   } else {
@@ -726,165 +517,6 @@
   return array;
 }

-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                         Register obj_reg,
-                                         Register swap_reg,
-                                         Register tmp_reg,
-                                         bool swap_reg_contains_mark,
-                                         Label& done,
-                                         Label* slow_case,
-                                         BiasedLockingCounters* counters) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
-  assert(tmp_reg != noreg, "tmp_reg must be supplied");
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-  Address saved_mark_addr(lock_reg, 0);
-
-  if (PrintBiasedLockingStatistics && counters == NULL)
-    counters = BiasedLocking::counters();
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    movq(swap_reg, mark_addr);
-  }
-  movq(tmp_reg, swap_reg);
-  andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
-  jcc(Assembler::notEqual, cas_label);
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  xorq(tmp_reg, swap_reg);
-  andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  jcc(Assembler::equal, done);
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  jcc(Assembler::notZero, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  testq(tmp_reg, markOopDesc::epoch_mask_in_place);
-  jcc(Assembler::notZero, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  andq(swap_reg,
-       markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-  movq(tmp_reg, swap_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, this means that
-  // another thread succeeded in biasing it toward itself and we
-  // need to revoke that bias. The revocation will occur in the
-  // interpreter runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  orq(tmp_reg, r15_thread);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // If the biasing toward our thread failed, then another thread
-  // succeeded in biasing it toward itself and we need to revoke that
-  // bias. The revocation will occur in the runtime in the slow case.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
-  }
-  if (slow_case != NULL) {
-    jcc(Assembler::notZero, *slow_case);
-  }
-  jmp(done);
-
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  load_prototype_header(tmp_reg, obj_reg);
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgq(tmp_reg, Address(obj_reg, 0));
-  // Fall through to the normal CAS-based lock, because no matter what
-  // the result of the above CAS, some thread must have succeeded in
-  // removing the bias bit from the object's header.
-  if (counters != NULL) {
-    cond_inc32(Assembler::zero,
-               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-
 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
   Label L, E;

@@ -983,6 +615,15 @@
   /* else */      { subq(dst, value)       ; return; }
 }

+void MacroAssembler::incrementq(AddressLiteral dst) {
+  if (reachable(dst)) {
+    incrementq(as_Address(dst));
+  } else {
+    lea(rscratch1, dst);
+    incrementq(Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::incrementq(Register reg, int value) {
   if (value == min_jint) { addq(reg, value); return; }
   if (value <  0) { decrementq(reg, -value); return; }
@@ -1051,15 +692,15 @@
   movq(dst, rscratch1);
 }

-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
   if (src.is_lval()) {
     mov_literal64(dst, (intptr_t)src.target(), src.rspec());
   } else {
     if (reachable(src)) {
       movq(dst, as_Address(src));
     } else {
-      lea(rscratch1, src);
-      movq(dst, Address(rscratch1,0));
+      lea(scratch, src);
+      movq(dst, Address(scratch, 0));
     }
   }
 }
@@ -1358,13 +999,37 @@
   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
 }

-void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
-  pushf();
+void MacroAssembler::atomic_incl(Address counter_addr) {
   if (os::is_MP())
     lock();
   incrementl(counter_addr);
-  popf();
-}
+}
+
+void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
+  if (reachable(counter_addr)) {
+    atomic_incl(as_Address(counter_addr));
+  } else {
+    lea(scr, counter_addr);
+    atomic_incl(Address(scr, 0));
+  }
+}
+
+#ifdef _LP64
+void MacroAssembler::atomic_incq(Address counter_addr) {
+  if (os::is_MP())
+    lock();
+  incrementq(counter_addr);
+}
+
+void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
+  if (reachable(counter_addr)) {
+    atomic_incq(as_Address(counter_addr));
+  } else {
+    lea(scr, counter_addr);
+    atomic_incq(Address(scr, 0));
+  }
+}
+#endif

 // Writes to stack successive pages until offset reached to check for
 // stack overflow + shadow pages.  This clobbers tmp.
@@ -1393,6 +1058,234 @@
   }
 }

+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+  LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = lock_reg;
+    assert_different_registers(lock_reg, obj_reg, swap_reg);
+  } else {
+    assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+  }
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  if (PrintBiasedLockingStatistics && counters == NULL) {
+    counters = BiasedLocking::counters();
+  }
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    movptr(swap_reg, mark_addr);
+  }
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  movptr(tmp_reg, swap_reg);
+  andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  jcc(Assembler::notEqual, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+#ifndef _LP64
+  // Note that because there is no current thread register on x86_32 we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  movptr(saved_mark_addr, swap_reg);
+#endif
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+  xorptr(tmp_reg, swap_reg);
+  Register header_reg = tmp_reg;
+#else
+  xorptr(tmp_reg, swap_reg);
+  get_thread(swap_reg);
+  xorptr(swap_reg, tmp_reg);
+  Register header_reg = swap_reg;
+#endif
+  andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->biased_lock_entry_count_addr()));
+  }
+  jcc(Assembler::equal, done);
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
+  jccb(Assembler::notZero, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  testptr(header_reg, markOopDesc::epoch_mask_in_place);
+  jccb(Assembler::notZero, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  andptr(swap_reg,
+         markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+#ifdef _LP64
+  movptr(tmp_reg, swap_reg);
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(tmp_reg);
+  orptr(tmp_reg, swap_reg);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+  orptr(tmp_reg, r15_thread);
+#else
+  get_thread(swap_reg);
+  orptr(tmp_reg, swap_reg);
+  movptr(swap_reg, saved_mark_addr);
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+  }
+  if (slow_case != NULL) {
+    jcc(Assembler::notZero, *slow_case);
+  }
+  jmp(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (counters != NULL) {
+    cond_inc32(Assembler::zero,
+               ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
   assert(UseBiasedLocking, "why call this otherwise?");

@@ -1408,6 +1301,996 @@
   jcc(Assembler::equal, done);
 }

+#ifdef COMPILER2
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+//        rtm_counters (RTMLockingCounters*)
+// flags are killed
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
+
+  atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
+  if (PrintPreciseRTMLockingStatistics) {
+    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+      Label check_abort;
+      testl(abort_status, (1<<i));
+      jccb(Assembler::equal, check_abort);
+      atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
+      bind(check_abort);
+    }
+  }
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp, scr and flags are killed
+void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
+  assert(tmp == rax, "");
+  assert(scr == rdx, "");
+  rdtsc(); // modifies EDX:EAX
+  andptr(tmp, count-1);
+  jccb(Assembler::notZero, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio
+// input:  rtm_counters_Reg (RTMLockingCounters* address)
+// tmpReg, rtm_counters_Reg and flags are killed
+void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
+                                                 Register rtm_counters_Reg,
+                                                 RTMLockingCounters* rtm_counters,
+                                                 Metadata* method_data) {
+  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+  if (RTMLockingCalculationDelay > 0) {
+    // Delay calculation
+    movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
+    testptr(tmpReg, tmpReg);
+    jccb(Assembler::equal, L_done);
+  }
+  // Abort ratio calculation only if abort_count > RTMAbortThreshold
+  //   Aborted transactions = abort_count * 100
+  //   All transactions = total_count *  RTMTotalCountIncrRate
+  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+
+  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
+  cmpptr(tmpReg, RTMAbortThreshold);
+  jccb(Assembler::below, L_check_always_rtm2);
+  imulptr(tmpReg, tmpReg, 100);
+
+  Register scrReg = rtm_counters_Reg;
+  movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
+  imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
+  imulptr(scrReg, scrReg, RTMAbortRatio);
+  cmpptr(tmpReg, scrReg);
+  jccb(Assembler::below, L_check_always_rtm1);
+  if (method_data != NULL) {
+    // set rtm_state to "no rtm" in MDO
+    mov_metadata(tmpReg, method_data);
+    if (os::is_MP()) {
+      lock();
+    }
+    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
+  }
+  jmpb(L_done);
+  bind(L_check_always_rtm1);
+  // Reload RTMLockingCounters* address
+  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
+  bind(L_check_always_rtm2);
+  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
+  cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+  jccb(Assembler::below, L_done);
+  if (method_data != NULL) {
+    // set rtm_state to "always rtm" in MDO
+    mov_metadata(tmpReg, method_data);
+    if (os::is_MP()) {
+      lock();
+    }
+    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
+  }
+  bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation
+// input:  abort_status_Reg
+// rtm_counters_Reg, flags are killed
+void MacroAssembler::rtm_profiling(Register abort_status_Reg,
+                                   Register rtm_counters_Reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data,
+                                   bool profile_rtm) {
+
+  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+  // update rtm counters based on rax value at abort
+  // reads abort_status_Reg, updates flags
+  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
+  rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
+  if (profile_rtm) {
+    // Save abort status because abort_status_Reg is used by following code.
+    if (RTMRetryCount > 0) {
+      push(abort_status_Reg);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
+    // restore abort status
+    if (RTMRetryCount > 0) {
+      pop(abort_status_Reg);
+    }
+  }
+}
+
+// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
+// inputs: retry_count_Reg
+//       : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+// flags are killed
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
+  Label doneRetry;
+  assert(abort_status_Reg == rax, "");
+  // The abort reason bits are in eax (see all states in rtmLocking.hpp)
+  // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
+  // if reason is in 0x6 and retry count != 0 then retry
+  andptr(abort_status_Reg, 0x6);
+  jccb(Assembler::zero, doneRetry);
+  testl(retry_count_Reg, retry_count_Reg);
+  jccb(Assembler::zero, doneRetry);
+  pause();
+  decrementl(retry_count_Reg);
+  jmp(retryLabel);
+  bind(doneRetry);
+}
+
+// Spin and retry if lock is busy,
+// inputs: box_Reg (monitor address)
+//       : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+//       : clear z flag if retry count exceeded
+// tmp_Reg, scr_Reg, flags are killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
+                                            Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
+  Label SpinLoop, SpinExit, doneRetry;
+  // Clean monitor_value bit to get valid pointer
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  testl(retry_count_Reg, retry_count_Reg);
+  jccb(Assembler::zero, doneRetry);
+  decrementl(retry_count_Reg);
+  movptr(scr_Reg, RTMSpinLoopCount);
+
+  bind(SpinLoop);
+  pause();
+  decrementl(scr_Reg);
+  jccb(Assembler::lessEqual, SpinExit);
+  movptr(tmp_Reg, Address(box_Reg, owner_offset));
+  testptr(tmp_Reg, tmp_Reg);
+  jccb(Assembler::notZero, SpinLoop);
+
+  bind(SpinExit);
+  jmp(retryLabel);
+  bind(doneRetry);
+  incrementl(retry_count_Reg); // clear z flag
+}
+
+// Use RTM for normal stack locks
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
+                                       Register retry_on_abort_count_Reg,
+                                       RTMLockingCounters* stack_rtm_counters,
+                                       Metadata* method_data, bool profile_rtm,
+                                       Label& DONE_LABEL, Label& IsInflated) {
+  assert(UseRTMForStackLocks, "why call this otherwise?");
+  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+  assert(tmpReg == rax, "");
+  assert(scrReg == rdx, "");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+  if (RTMRetryCount > 0) {
+    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  if (!UseRTMXendForLockBusy) {
+    movptr(tmpReg, Address(objReg, 0));
+    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jcc(Assembler::notZero, IsInflated);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      // tmpReg, scrReg and flags are killed
+      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+    atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
+    bind(L_noincrement);
+  }
+  xbegin(L_on_abort);
+  movptr(tmpReg, Address(objReg, 0));       // fetch markword
+  andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+  cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
+  jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
+
+  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
+  if (UseRTMXendForLockBusy) {
+    xend();
+    movptr(tmpReg, Address(objReg, 0));
+    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jcc(Assembler::notZero, IsInflated);
+    movptr(abort_status_Reg, 0x1);                // Set the abort status to 1 (as xabort does)
+    jmp(L_decrement_retry);
+  }
+  else {
+    xabort(0);
+  }
+  bind(L_on_abort);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
+  }
+  bind(L_decrement_retry);
+  if (RTMRetryCount > 0) {
+    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+}
+
+// Use RTM for inflating locks
+// inputs: objReg (object to lock)
+//         boxReg (on-stack box address (displaced header location) - KILLED)
+//         tmpReg (ObjectMonitor address + 2(monitor_value))
+void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
+                                          Register scrReg, Register retry_on_busy_count_Reg,
+                                          Register retry_on_abort_count_Reg,
+                                          RTMLockingCounters* rtm_counters,
+                                          Metadata* method_data, bool profile_rtm,
+                                          Label& DONE_LABEL) {
+  assert(UseRTMLocking, "why call this otherwise?");
+  assert(tmpReg == rax, "");
+  assert(scrReg == rdx, "");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+  // Clean monitor_value bit to get valid pointer
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  // Without cast to int32_t a movptr will destroy r10 which is typically obj
+  movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+  movptr(boxReg, tmpReg); // Save ObjectMonitor address
+
+  if (RTMRetryCount > 0) {
+    movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
+    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      // tmpReg, scrReg and flags are killed
+      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
+    bind(L_noincrement);
+  }
+  xbegin(L_on_abort);
+  movptr(tmpReg, Address(objReg, 0));
+  movptr(tmpReg, Address(tmpReg, owner_offset));
+  testptr(tmpReg, tmpReg);
+  jcc(Assembler::zero, DONE_LABEL);
+  if (UseRTMXendForLockBusy) {
+    xend();
+    jmp(L_decrement_retry);
+  }
+  else {
+    xabort(0);
+  }
+  bind(L_on_abort);
+  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
+  }
+  if (RTMRetryCount > 0) {
+    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+
+  movptr(tmpReg, Address(boxReg, owner_offset)) ;
+  testptr(tmpReg, tmpReg) ;
+  jccb(Assembler::notZero, L_decrement_retry) ;
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+#ifdef _LP64
+  Register threadReg = r15_thread;
+#else
+  get_thread(scrReg);
+  Register threadReg = scrReg;
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
+
+  if (RTMRetryCount > 0) {
+    // success done else retry
+    jccb(Assembler::equal, DONE_LABEL) ;
+    bind(L_decrement_retry);
+    // Spin and retry if lock is busy.
+    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
+  }
+  else {
+    bind(L_decrement_retry);
+  }
+}
+
+#endif //  INCLUDE_RTM_OPT
+
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time.  These methods would accept arguments as
+// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
+// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods.  That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads.  We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
+//    the lock operators would typically be faster than reifying Self.
+//
+// *  Ideally I'd define the primitives as:
+//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
+//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
+//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
+//    Instead, we're stuck with a rather awkward and brittle register assignments below.
+//    Furthermore the register assignments are overconstrained, possibly resulting in
+//    sub-optimal code near the synchronization site.
+//
+// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
+//    Alternately, use a better sp-proximity test.
+//
+// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+//    Either one is sufficient to uniquely identify a thread.
+//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// *  Intrinsify notify() and notifyAll() for the common cases where the
+//    object is locked by the calling thread but the waitlist is empty.
+//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// *  use jccb and jmpb instead of jcc and jmp to improve code density.
+//    But beware of excessive branch density on AMD Opterons.
+//
+// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+//    or failure of the fast-path.  If the fast-path fails then we pass
+//    control to the slow-path, typically in C.  In Fast_Lock and
+//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+//    will emit a conditional branch immediately after the node.
+//    So we have branches to branches and lots of ICC.ZF games.
+//    Instead, it might be better to have C2 pass a "FailureLabel"
+//    into Fast_Lock and Fast_Unlock.  In the case of success, control
+//    will drop through the node.  ICC.ZF is undefined at exit.
+//    In the case of failure, the node will branch directly to the
+//    FailureLabel
+
+
+// obj: object to lock
+// box: on-stack box address (displaced header location) - KILLED
+// rax,: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
+                               Register scrReg, Register cx1Reg, Register cx2Reg,
+                               BiasedLockingCounters* counters,
+                               RTMLockingCounters* rtm_counters,
+                               RTMLockingCounters* stack_rtm_counters,
+                               Metadata* method_data,
+                               bool use_rtm, bool profile_rtm) {
+  // Ensure the register assignents are disjoint
+  assert(tmpReg == rax, "");
+
+  if (use_rtm) {
+    assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
+  } else {
+    assert(cx1Reg == noreg, "");
+    assert(cx2Reg == noreg, "");
+    assert_different_registers(objReg, boxReg, tmpReg, scrReg);
+  }
+
+  if (counters != NULL) {
+    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
+  }
+  if (EmitSync & 1) {
+      // set box->dhw = unused_mark (3)
+      // Force all sync thru slow-path: slow_enter() and slow_exit()
+      movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+      cmpptr (rsp, (int32_t)NULL_WORD);
+  } else
+  if (EmitSync & 2) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+         // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+         biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+      }
+
+      movptr(tmpReg, Address(objReg, 0));           // fetch markword
+      orptr (tmpReg, 0x1);
+      movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
+      if (os::is_MP()) {
+        lock();
+      }
+      cmpxchgptr(boxReg, Address(objReg, 0));       // Updates tmpReg
+      jccb(Assembler::equal, DONE_LABEL);
+      // Recursive locking
+      subptr(tmpReg, rsp);
+      andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+      movptr(Address(boxReg, 0), tmpReg);
+      bind(DONE_LABEL);
+  } else {
+    // Possible cases that we'll encounter in fast_lock
+    // ------------------------------------------------
+    // * Inflated
+    //    -- unlocked
+    //    -- Locked
+    //       = by self
+    //       = by other
+    // * biased
+    //    -- by Self
+    //    -- by other
+    // * neutral
+    // * stack-locked
+    //    -- by self
+    //       = sp-proximity test hits
+    //       = sp-proximity test generates false-negative
+    //    -- by other
+    //
+
+    Label IsInflated, DONE_LABEL;
+
+    // it's stack-locked, biased or neutral
+    // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+    // order to reduce the number of conditional branches in the most common cases.
+    // Beware -- there's a subtle invariant that fetch of the markword
+    // at [FETCH], below, will never observe a biased encoding (*101b).
+    // If this invariant is not held we risk exclusion (safety) failure.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
+    }
+
+#if INCLUDE_RTM_OPT
+    if (UseRTMForStackLocks && use_rtm) {
+      rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
+                        stack_rtm_counters, method_data, profile_rtm,
+                        DONE_LABEL, IsInflated);
+    }
+#endif // INCLUDE_RTM_OPT
+
+    movptr(tmpReg, Address(objReg, 0));          // [FETCH]
+    testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+    jccb(Assembler::notZero, IsInflated);
+
+    // Attempt stack-locking ...
+    orptr (tmpReg, markOopDesc::unlocked_value);
+    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jcc(Assembler::equal, DONE_LABEL);           // Success
+
+    // Recursive locking.
+    // The object is stack-locked: markword contains stack pointer to BasicLock.
+    // Locked by current thread if difference with current SP is less than one page.
+    subptr(tmpReg, rsp);
+    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
+    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+    movptr(Address(boxReg, 0), tmpReg);
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jmp(DONE_LABEL);
+
+    bind(IsInflated);
+    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
+
+#if INCLUDE_RTM_OPT
+    // Use the same RTM locking code in 32- and 64-bit VM.
+    if (use_rtm) {
+      rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
+                           rtm_counters, method_data, profile_rtm, DONE_LABEL);
+    } else {
+#endif // INCLUDE_RTM_OPT
+
+#ifndef _LP64
+    // The object is inflated.
+    //
+    // TODO-FIXME: eliminate the ugly use of manifest constants:
+    //   Use markOopDesc::monitor_value instead of "2".
+    //   use markOop::unused_mark() instead of "3".
+    // The tmpReg value is an objectMonitor reference ORed with
+    // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
+    // objectmonitor pointer by masking off the "2" bit or we can just
+    // use tmpReg as an objectmonitor pointer but bias the objectmonitor
+    // field offsets with "-2" to compensate for and annul the low-order tag bit.
+    //
+    // I use the latter as it avoids AGI stalls.
+    // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
+    // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
+    //
+    #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
+
+    // boxReg refers to the on-stack BasicLock in the current frame.
+    // We'd like to write:
+    //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
+    // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
+    // additional latency as we have another ST in the store buffer that must drain.
+
+    if (EmitSync & 8192) {
+       movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
+       get_thread (scrReg);
+       movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
+       movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    } else
+    if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
+       movptr(scrReg, boxReg);
+       movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form: consider XORL tmpReg,tmpReg
+         movptr(tmpReg, NULL_WORD);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         // Test-And-CAS instead of CAS
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Ideally, I'd manifest "Self" with get_thread and then attempt
+       // to CAS the register containing Self into m->Owner.
+       // But we don't have enough registers, so instead we can either try to CAS
+       // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
+       // we later store "Self" into m->Owner.  Transiently storing a stack address
+       // (rsp or the address of the box) into  m->owner is harmless.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
+       jccb  (Assembler::notZero, DONE_LABEL);
+       get_thread (scrReg);                    // beware: clobbers ICCs
+       movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
+       xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    } else {
+       movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
+       movptr(boxReg, tmpReg);
+
+       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+          // prefetchw [eax + Offset(_owner)-2]
+          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       }
+
+       if ((EmitSync & 64) == 0) {
+         // Optimistic form
+         xorptr  (tmpReg, tmpReg);
+       } else {
+         // Can suffer RTS->RTO upgrades on shared or cold $ lines
+         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         testptr(tmpReg, tmpReg);                   // Locked ?
+         jccb  (Assembler::notZero, DONE_LABEL);
+       }
+
+       // Appears unlocked - try to swing _owner from null to non-null.
+       // Use either "Self" (in scr) or rsp as thread identity in _owner.
+       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+       get_thread (scrReg);
+       if (os::is_MP()) {
+         lock();
+       }
+       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+
+       // If the CAS fails we can either retry or pass control to the slow-path.
+       // We use the latter tactic.
+       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+       // If the CAS was successful ...
+       //   Self has acquired the lock
+       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+       // Intentional fall-through into DONE_LABEL ...
+    }
+#else // _LP64
+    // It's inflated
+
+    // TODO: someday avoid the ST-before-CAS penalty by
+    // relocating (deferring) the following ST.
+    // We should also think about trying a CAS without having
+    // fetched _owner.  If the CAS is successful we may
+    // avoid an RTO->RTS upgrade on the $line.
+
+    // Without cast to int32_t a movptr will destroy r10 which is typically obj
+    movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+
+    movptr (boxReg, tmpReg);
+    movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    testptr(tmpReg, tmpReg);
+    jccb   (Assembler::notZero, DONE_LABEL);
+
+    // It's inflated and appears unlocked
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    // Intentional fall-through into DONE_LABEL ...
+#endif // _LP64
+
+#if INCLUDE_RTM_OPT
+    } // use_rtm()
+#endif
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    bind(DONE_LABEL);
+
+    // At DONE_LABEL the icc ZFlag is set as follows ...
+    // Fast_Unlock uses the same protocol.
+    // ZFlag == 1 -> Success
+    // ZFlag == 0 -> Failure - force control through the slow-path
+  }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed.  Must be EAX.
+// tmp: killed, cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1:  At return-time the interpreter automatically and quietly unlocks any
+//      objects acquired the current activation (frame).  Recall that the
+//      interpreter maintains an on-stack list of locks currently held by
+//      a frame.
+// I2:  If a method attempts to unlock an object that is not held by the
+//      the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
+  assert(boxReg == rax, "");
+  assert_different_registers(objReg, boxReg, tmpReg);
+
+  if (EmitSync & 4) {
+    // Disable - inhibit all inlining.  Force control through the slow-path
+    cmpptr (rsp, 0);
+  } else
+  if (EmitSync & 8) {
+    Label DONE_LABEL;
+    if (UseBiasedLocking) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+    // Classic stack-locking code ...
+    // Check whether the displaced header is 0
+    //(=> recursive unlock)
+    movptr(tmpReg, Address(boxReg, 0));
+    testptr(tmpReg, tmpReg);
+    jccb(Assembler::zero, DONE_LABEL);
+    // If not recursive lock, reset the header to displaced header
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
+    bind(DONE_LABEL);
+  } else {
+    Label DONE_LABEL, Stacked, CheckSucc;
+
+    // Critically, the biased locking test must have precedence over
+    // and appear before the (box->dhw == 0) recursive stack-lock test.
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+    }
+
+#if INCLUDE_RTM_OPT
+    if (UseRTMForStackLocks && use_rtm) {
+      assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+      Label L_regular_unlock;
+      movptr(tmpReg, Address(objReg, 0));           // fetch markword
+      andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+      cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
+      jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
+      xend();                                       // otherwise end...
+      jmp(DONE_LABEL);                              // ... and we're done
+      bind(L_regular_unlock);
+    }
+#endif
+
+    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+    jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+    movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
+    testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
+    jccb  (Assembler::zero, Stacked);
+
+    // It's inflated.
+#if INCLUDE_RTM_OPT
+    if (use_rtm) {
+      Label L_regular_inflated_unlock;
+      // Clean monitor_value bit to get valid pointer
+      int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+      movptr(boxReg, Address(tmpReg, owner_offset));
+      testptr(boxReg, boxReg);
+      jccb(Assembler::notZero, L_regular_inflated_unlock);
+      xend();
+      jmpb(DONE_LABEL);
+      bind(L_regular_inflated_unlock);
+    }
+#endif
+
+    // Despite our balanced locking property we still check that m->_owner == Self
+    // as java routines or native JNI code called by this thread might
+    // have released the lock.
+    // Refer to the comments in synchronizer.cpp for how we might encode extra
+    // state in _succ so we can avoid fetching EntryList|cxq.
+    //
+    // I'd like to add more cases in fast_lock() and fast_unlock() --
+    // such as recursive enter and exit -- but we have to be wary of
+    // I$ bloat, T$ effects and BP$ effects.
+    //
+    // If there's no contention try a 1-0 exit.  That is, exit without
+    // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+    // we detect and recover from the race that the 1-0 exit admits.
+    //
+    // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+    // before it STs null into _owner, releasing the lock.  Updates
+    // to data protected by the critical section must be visible before
+    // we drop the lock (and thus before any other thread could acquire
+    // the lock and observe the fields protected by the lock).
+    // IA32's memory-model is SPO, so STs are ordered with respect to
+    // each other and there's no need for an explicit barrier (fence).
+    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+#ifndef _LP64
+    get_thread (boxReg);
+    if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+      // prefetchw [ebx + Offset(_owner)-2]
+      prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    }
+
+    // Note that we could employ various encoding schemes to reduce
+    // the number of loads below (currently 4) to just 2 or 3.
+    // Refer to the comments in synchronizer.cpp.
+    // In practice the chain of fetches doesn't seem to impact performance, however.
+    if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
+       // Attempt to reduce branch density - AMD's branch predictor.
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    } else {
+       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, DONE_LABEL);
+       movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       jccb  (Assembler::notZero, CheckSucc);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       jmpb  (DONE_LABEL);
+    }
+
+    // The Following code fragment (EmitSync & 65536) improves the performance of
+    // contended applications and contended synchronization microbenchmarks.
+    // Unfortunately the emission of the code - even though not executed - causes regressions
+    // in scimark and jetstream, evidently because of $ effects.  Replacing the code
+    // with an equal number of never-executed NOPs results in the same regression.
+    // We leave it off by default.
+
+    if ((EmitSync & 65536) != 0) {
+       Label LSuccess, LGoSlowPath ;
+
+       bind  (CheckSucc);
+
+       // Optional pre-test ... it's safe to elide this
+       if ((EmitSync & 16) == 0) {
+          cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+          jccb  (Assembler::zero, LGoSlowPath);
+       }
+
+       // We have a classic Dekker-style idiom:
+       //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
+       // There are a number of ways to implement the barrier:
+       // (1) lock:andl &m->_owner, 0
+       //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
+       //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
+       //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
+       // (2) If supported, an explicit MFENCE is appealing.
+       //     In older IA32 processors MFENCE is slower than lock:add or xchg
+       //     particularly if the write-buffer is full as might be the case if
+       //     if stores closely precede the fence or fence-equivalent instruction.
+       //     In more modern implementations MFENCE appears faster, however.
+       // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
+       //     The $lines underlying the top-of-stack should be in M-state.
+       //     The locked add instruction is serializing, of course.
+       // (4) Use xchg, which is serializing
+       //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
+       // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
+       //     The integer condition codes will tell us if succ was 0.
+       //     Since _succ and _owner should reside in the same $line and
+       //     we just stored into _owner, it's likely that the $line
+       //     remains in M-state for the lock:orl.
+       //
+       // We currently use (3), although it's likely that switching to (2)
+       // is correct for the future.
+
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       if (os::is_MP()) {
+          if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
+            mfence();
+          } else {
+            lock (); addptr(Address(rsp, 0), 0);
+          }
+       }
+       // Ratify _succ remains non-null
+       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
+       jccb  (Assembler::notZero, LSuccess);
+
+       xorptr(boxReg, boxReg);                  // box is really EAX
+       if (os::is_MP()) { lock(); }
+       cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       jccb  (Assembler::notEqual, LSuccess);
+       // Since we're low on registers we installed rsp as a placeholding in _owner.
+       // Now install Self over rsp.  This is safe as we're transitioning from
+       // non-null to non=null
+       get_thread (boxReg);
+       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
+       // Intentional fall-through into LGoSlowPath ...
+
+       bind  (LGoSlowPath);
+       orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+       jmpb  (DONE_LABEL);
+
+       bind  (LSuccess);
+       xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
+       jmpb  (DONE_LABEL);
+    }
+
+    bind (Stacked);
+    // It's not inflated and it's not recursively stack-locked and it's not biased.
+    // It must be stack-locked.
+    // Try to reset the header to displaced header.
+    // The "box" value on the stack is stable, so we can reload
+    // and be assured we observe the same value as above.
+    movptr(tmpReg, Address(boxReg, 0));
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+    // Intention fall-thru into DONE_LABEL
+
+    // DONE_LABEL is a hot target - we'd really like to place it at the
+    // start of cache line by padding with NOPs.
+    // See the AMD and Intel software optimization manuals for the
+    // most efficient "long" NOP encodings.
+    // Unfortunately none of our alignment mechanisms suffice.
+    if ((EmitSync & 65536) == 0) {
+       bind (CheckSucc);
+    }
+#else // _LP64
+    // It's inflated
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    xorptr(boxReg, r15_thread);
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, DONE_LABEL);
+    movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, CheckSucc);
+    movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+    jmpb  (DONE_LABEL);
+
+    if ((EmitSync & 65536) == 0) {
+      Label LSuccess, LGoSlowPath ;
+      bind  (CheckSucc);
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::zero, LGoSlowPath);
+
+      // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
+      // the explicit ST;MEMBAR combination, but masm doesn't currently support
+      // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
+      // are all faster when the write buffer is populated.
+      movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      if (os::is_MP()) {
+         lock (); addl (Address(rsp, 0), 0);
+      }
+      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      jccb  (Assembler::notZero, LSuccess);
+
+      movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
+      if (os::is_MP()) { lock(); }
+      cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+      jccb  (Assembler::notEqual, LSuccess);
+      // Intentional fall-through into slow-path
+
+      bind  (LGoSlowPath);
+      orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+      jmpb  (DONE_LABEL);
+
+      bind  (LSuccess);
+      testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
+      jmpb  (DONE_LABEL);
+    }
+
+    bind  (Stacked);
+    movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+    if (os::is_MP()) { lock(); }
+    cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+
+    if (EmitSync & 65536) {
+       bind (CheckSucc);
+    }
+#endif
+    bind(DONE_LABEL);
+    // Avoid branch to branch on AMD processors
+    if (EmitSync & 32768) {
+       nop();
+    }
+  }
+}
+#endif // COMPILER2
+
 void MacroAssembler::c2bool(Register x) {
   // implements x == 0 ? 0 : 1
   // note: must only look at least-significant byte of x
@@ -1969,7 +2852,9 @@
   Condition negated_cond = negate_condition(cond);
   Label L;
   jcc(negated_cond, L);
+  pushf(); // Preserve flags
   atomic_incl(counter_addr);
+  popf();
   bind(L);
 }
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -27,6 +27,7 @@

 #include "asm/assembler.hpp"
 #include "utilities/macros.hpp"
+#include "runtime/rtmLocking.hpp"


 // MacroAssembler extends Assembler by frequently used macros.
@@ -111,7 +112,8 @@
         op == 0xE9 /* jmp */ ||
         op == 0xEB /* short jmp */ ||
         (op & 0xF0) == 0x70 /* short jcc */ ||
-        op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
+        op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||
+        op == 0xC7 && branch[1] == 0xF8 /* xbegin */,
         "Invalid opcode at patch point");

     if (op == 0xEB || (op & 0xF0) == 0x70) {
@@ -121,7 +123,7 @@
       guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
       *disp = imm8;
     } else {
-      int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
+      int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
       int imm32 = target - (address) &disp[1];
       *disp = imm32;
     }
@@ -161,7 +163,6 @@
   void incrementq(Register reg, int value = 1);
   void incrementq(Address dst, int value = 1);

-
   // Support optimal SSE move instructions.
   void movflt(XMMRegister dst, XMMRegister src) {
     if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
@@ -187,6 +188,8 @@
   void incrementl(AddressLiteral dst);
   void incrementl(ArrayAddress dst);

+  void incrementq(AddressLiteral dst);
+
   // Alignment
   void align(int modulus);

@@ -651,7 +654,40 @@
                            Label& done, Label* slow_case = NULL,
                            BiasedLockingCounters* counters = NULL);
   void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
+#ifdef COMPILER2
+  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+  // See full desription in macroAssembler_x86.cpp.
+  void fast_lock(Register obj, Register box, Register tmp,
+                 Register scr, Register cx1, Register cx2,
+                 BiasedLockingCounters* counters,
+                 RTMLockingCounters* rtm_counters,
+                 RTMLockingCounters* stack_rtm_counters,
+                 Metadata* method_data,
+                 bool use_rtm, bool profile_rtm);
+  void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
+  void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
+  void rtm_stack_locking(Register obj, Register tmp, Register scr,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(Register obj, Register box, Register tmp,
+                            Register scr, Register retry_on_busy_count,
+                            Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
+#endif

   Condition negate_condition(Condition cond);

@@ -716,6 +752,7 @@


   void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
+  void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }


   void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
@@ -757,7 +794,14 @@
   // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
   void cond_inc32(Condition cond, AddressLiteral counter_addr);
   // Unconditional atomic increment.
-  void atomic_incl(AddressLiteral counter_addr);
+  void atomic_incl(Address counter_addr);
+  void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
+#ifdef _LP64
+  void atomic_incq(Address counter_addr);
+  void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
+#endif
+  void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
+  void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; }

   void lea(Register dst, AddressLiteral adr);
   void lea(Address dst, AddressLiteral adr);
@@ -1069,7 +1113,11 @@

   void movptr(Register dst, Address src);

-  void movptr(Register dst, AddressLiteral src);
+#ifdef _LP64
+  void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
+#else
+  void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
+#endif

   void movptr(Register dst, intptr_t src);
   void movptr(Register dst, Register src);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/rtmLocking.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/task.hpp"
+#include "runtime/rtmLocking.hpp"
+
+// One-shot PeriodicTask subclass for enabling RTM locking
+uintx RTMLockingCounters::_calculation_flag = 0;
+
+class RTMLockingCalculationTask : public PeriodicTask {
+ public:
+  RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){  }
+
+  virtual void task() {
+    RTMLockingCounters::_calculation_flag = 1;
+    // Reclaim our storage and disenroll ourself
+    delete this;
+  }
+};
+
+void RTMLockingCounters::init() {
+  if (UseRTMLocking && RTMLockingCalculationDelay > 0) {
+    RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay);
+    task->enroll();
+  } else {
+    _calculation_flag = 1;
+  }
+}
+
+//------------------------------print_on-------------------------------
+void RTMLockingCounters::print_on(outputStream* st) {
+  tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate);
+  tty->print_cr("# rtm lock aborts  : " UINTX_FORMAT, _abort_count);
+  for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
+    tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]);
+  }
+}
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1817,6 +1817,13 @@
   // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;

+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling JNI
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
   // Calculate the difference between rsp and rbp,. We need to know it
   // after the native call because on windows Java Natives will pop
   // the arguments and it is painful to do rsp relative addressing
@@ -3170,6 +3177,12 @@
   };

   address start = __ pc();
+
+  if (UseRTMLocking) {
+    // Abort RTM transaction before possible nmethod deoptimization.
+    __ xabort(0);
+  }
+
   // Push self-frame.
   __ subptr(rsp, return_off*wordSize);     // Epilog!

@@ -3355,6 +3368,14 @@
   address call_pc = NULL;
   bool cause_return = (poll_type == POLL_AT_RETURN);
   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
+
+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling runtime
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
   // If cause_return is true we are at a poll_return and there is
   // the return address on the stack to the caller on the nmethod
   // that is safepoint. We can leave this return on the stack and
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2012,6 +2012,13 @@
   // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;

+    if (UseRTMLocking) {
+      // Abort RTM transaction before calling JNI
+      // because critical section will be large and will be
+      // aborted anyway. Also nmethod could be deoptimized.
+      __ xabort(0);
+    }
+
 #ifdef ASSERT
     {
       Label L;
@@ -3612,6 +3619,11 @@

   address start = __ pc();

+  if (UseRTMLocking) {
+    // Abort RTM transaction before possible nmethod deoptimization.
+    __ xabort(0);
+  }
+
   // Push self-frame.  We get here with a return address on the
   // stack, so rsp is 8-byte aligned until we allocate our frame.
   __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
@@ -3792,6 +3804,13 @@
   bool cause_return = (poll_type == POLL_AT_RETURN);
   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);

+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling runtime
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
   // Make room for return address (or push it again)
   if (!cause_return) {
     __ push(rbx);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -50,8 +50,13 @@
 const char*           VM_Version::_features_str = "";
 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };

+// Address of instruction which causes SEGV
+address VM_Version::_cpuinfo_segv_addr = 0;
+// Address of instruction after the one which causes SEGV
+address VM_Version::_cpuinfo_cont_addr = 0;
+
 static BufferBlob* stub_blob;
-static const int stub_size = 550;
+static const int stub_size = 600;

 extern "C" {
   typedef void (*getPsrInfo_stub_t)(void*);
@@ -234,9 +239,9 @@
     // Check if OS has enabled XGETBV instruction to access XCR0
     // (OSXSAVE feature flag) and CPU supports AVX
     //
-    __ andl(rcx, 0x18000000);
+    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
     __ cmpl(rcx, 0x18000000);
-    __ jccb(Assembler::notEqual, sef_cpuid);
+    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported

     //
     // XCR0, XFEATURE_ENABLED_MASK register
@@ -247,6 +252,47 @@
     __ movl(Address(rsi, 0), rax);
     __ movl(Address(rsi, 4), rdx);

+    __ andl(rax, 0x6); // xcr0 bits sse | ymm
+    __ cmpl(rax, 0x6);
+    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
+
+    //
+    // Some OSs have a bug when upper 128bits of YMM
+    // registers are not restored after a signal processing.
+    // Generate SEGV here (reference through NULL)
+    // and check upper YMM bits after it.
+    //
+    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
+
+    // load value into all 32 bytes of ymm7 register
+    __ movl(rcx, VM_Version::ymm_test_value());
+
+    __ movdl(xmm0, rcx);
+    __ pshufd(xmm0, xmm0, 0x00);
+    __ vinsertf128h(xmm0, xmm0, xmm0);
+    __ vmovdqu(xmm7, xmm0);
+#ifdef _LP64
+    __ vmovdqu(xmm8,  xmm0);
+    __ vmovdqu(xmm15, xmm0);
+#endif
+
+    __ xorl(rsi, rsi);
+    VM_Version::set_cpuinfo_segv_addr( __ pc() );
+    // Generate SEGV
+    __ movl(rax, Address(rsi, 0));
+
+    VM_Version::set_cpuinfo_cont_addr( __ pc() );
+    // Returns here after signal. Save xmm0 to check it later.
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
+    __ vmovdqu(Address(rsi,  0), xmm0);
+    __ vmovdqu(Address(rsi, 32), xmm7);
+#ifdef _LP64
+    __ vmovdqu(Address(rsi, 64), xmm8);
+    __ vmovdqu(Address(rsi, 96), xmm15);
+#endif
+
+    VM_Version::clean_cpuFeatures();
+
     //
     // cpuid(0x7) Structured Extended Features
     //
@@ -429,7 +475,7 @@
   }

   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -446,8 +492,9 @@
                (supports_avx()    ? ", avx" : ""),
                (supports_avx2()   ? ", avx2" : ""),
                (supports_aes()    ? ", aes" : ""),
-               (supports_clmul()    ? ", clmul" : ""),
+               (supports_clmul()  ? ", clmul" : ""),
                (supports_erms()   ? ", erms" : ""),
+               (supports_rtm()    ? ", rtm" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
@@ -455,7 +502,9 @@
                (supports_ht() ? ", ht": ""),
                (supports_tsc() ? ", tsc": ""),
                (supports_tscinv_bit() ? ", tscinvbit": ""),
-               (supports_tscinv() ? ", tscinv": ""));
+               (supports_tscinv() ? ", tscinv": ""),
+               (supports_bmi1() ? ", bmi1" : ""),
+               (supports_bmi2() ? ", bmi2" : ""));
   _features_str = strdup(buf);

   // UseSSE is set to the smaller of what hardware supports and what
@@ -486,7 +535,7 @@
     }
   } else if (UseAES) {
     if (!FLAG_IS_DEFAULT(UseAES))
-      warning("AES instructions not available on this CPU");
+      warning("AES instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseAES, false);
   }

@@ -519,10 +568,57 @@
     }
   } else if (UseAESIntrinsics) {
     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics not available on this CPU");
+      warning("AES intrinsics are not available on this CPU");
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }

+  // Adjust RTM (Restricted Transactional Memory) flags
+  if (!supports_rtm() && UseRTMLocking) {
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    // VM_Version_init() is executed after UseBiasedLocking is used
+    // in Thread::allocate().
+    vm_exit_during_initialization("RTM instructions are not available on this CPU");
+  }
+
+#if INCLUDE_RTM_OPT
+  if (UseRTMLocking) {
+    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+      // RTM locking should be used only for applications with
+      // high lock contention. For now we do not use it by default.
+      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+    }
+    if (!is_power_of_2(RTMTotalCountIncrRate)) {
+      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+    }
+    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+    }
+  } else { // !UseRTMLocking
+    if (UseRTMForStackLocks) {
+      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+      }
+      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+    }
+    if (UseRTMDeopt) {
+      FLAG_SET_DEFAULT(UseRTMDeopt, false);
+    }
+    if (PrintPreciseRTMLockingStatistics) {
+      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+    }
+  }
+#else
+  if (UseRTMLocking) {
+    // Only C2 does RTM locking optimization.
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+  }
+#endif
+
 #ifdef COMPILER2
   if (UseFPUForSpilling) {
     if (UseSSE < 2) {
@@ -538,14 +634,28 @@
     if (MaxVectorSize > 32) {
       FLAG_SET_DEFAULT(MaxVectorSize, 32);
     }
-    if (MaxVectorSize > 16 && UseAVX == 0) {
-      // Only supported with AVX+
+    if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
+      // 32 bytes vectors (in YMM) are only supported with AVX+
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
     if (UseSSE < 2) {
-      // Only supported with SSE2+
+      // Vectors (in XMM) are only supported with SSE2+
       FLAG_SET_DEFAULT(MaxVectorSize, 0);
     }
+#ifdef ASSERT
+    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
+      tty->print_cr("State of YMM registers after signal handle:");
+      int nreg = 2 LP64_ONLY(+2);
+      const char* ymm_name[4] = {"0", "7", "8", "15"};
+      for (int i = 0; i < nreg; i++) {
+        tty->print("YMM%s:", ymm_name[i]);
+        for (int j = 7; j >=0; j--) {
+          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
+        }
+        tty->cr();
+      }
+    }
+#endif
   }
 #endif

@@ -600,13 +710,6 @@
       }
     }

-    // Use count leading zeros count instruction if available.
-    if (supports_lzcnt()) {
-      if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
-        UseCountLeadingZerosInstruction = true;
-      }
-    }
-
     // some defaults for AMD family 15h
     if ( cpu_family() == 0x15 ) {
       // On family 15h processors default is no sw prefetch
@@ -683,14 +786,35 @@
       }
     }
   }
-#if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on BSD until it fixes
-      // restoring upper 128bit of YMM registers on return
-      // from signal handler.
-      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+
+  // Use count leading zeros count instruction if available.
+  if (supports_lzcnt()) {
+    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
+      UseCountLeadingZerosInstruction = true;
+    }
+   } else if (UseCountLeadingZerosInstruction) {
+    warning("lzcnt instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
+  }
+
+  if (supports_bmi1()) {
+    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
+      UseBMI1Instructions = true;
     }
-#endif // COMPILER2
+  } else if (UseBMI1Instructions) {
+    warning("BMI1 instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
+  }
+
+  // Use count trailing zeros instruction if available
+  if (supports_bmi1()) {
+    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
+      UseCountTrailingZerosInstruction = UseBMI1Instructions;
+    }
+  } else if (UseCountTrailingZerosInstruction) {
+    warning("tzcnt instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
+  }

   // Use population count instruction if available.
   if (supports_popcnt()) {
@@ -790,6 +914,11 @@
     if (UseAES) {
       tty->print("  UseAES=1");
     }
+#ifdef COMPILER2
+    if (MaxVectorSize > 0) {
+      tty->print("  MaxVectorSize=%d", MaxVectorSize);
+    }
+#endif
     tty->cr();
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
@@ -832,6 +961,27 @@
 #endif // !PRODUCT
 }

+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+  // RTM locking is most useful when there is high lock contention and
+  // low data contention.  With high lock contention the lock is usually
+  // inflated and biased locking is not suitable for that case.
+  // RTM locking code requires that biased locking is off.
+  // Note: we can't switch off UseBiasedLocking in get_processor_features()
+  // because it is used by Thread::allocate() which is called before
+  // VM_Version::initialize().
+  if (UseRTMLocking && UseBiasedLocking) {
+    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+      FLAG_SET_DEFAULT(UseBiasedLocking, false);
+    } else {
+      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+      UseBiasedLocking = false;
+    }
+  }
+#endif
+  return UseBiasedLocking;
+}
+
 void VM_Version::initialize() {
   ResourceMark rm;
   // Making this stub must be FIRST use of assembler
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -141,7 +141,8 @@
     struct {
       uint32_t LahfSahf     : 1,
                CmpLegacy    : 1,
-                            : 4,
+                            : 3,
+               lzcnt_intel  : 1,
                lzcnt        : 1,
                sse4a        : 1,
                misalignsse  : 1,
@@ -206,7 +207,9 @@
                         : 2,
                    bmi2 : 1,
                    erms : 1,
-                        : 22;
+                        : 1,
+                   rtm  : 1,
+                        : 20;
     } bits;
   };

@@ -228,6 +231,9 @@
                                // 0 if this instruction is not available
   static const char* _features_str;

+  static address   _cpuinfo_segv_addr; // address of instruction which causes SEGV
+  static address   _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
+
   enum {
     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
     CPU_CMOV   = (1 << 1),
@@ -251,7 +257,10 @@
     CPU_AVX2   = (1 << 18),
     CPU_AES    = (1 << 19),
     CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
-    CPU_CLMUL  = (1 << 21) // carryless multiply for CRC
+    CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
+    CPU_BMI1   = (1 << 22),
+    CPU_BMI2   = (1 << 23),
+    CPU_RTM    = (1 << 24)  // Restricted Transactional Memory instructions
   } cpuFeatureFlags;

   enum {
@@ -358,6 +367,9 @@
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
+
+    // Space to save ymm registers after signal handle
+    int          ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
   };

   // The actual cpuid info block
@@ -423,6 +435,8 @@
       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
         result |= CPU_AVX2;
     }
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
+      result |= CPU_BMI1;
     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
       result |= CPU_TSC;
     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
@@ -433,6 +447,8 @@
       result |= CPU_ERMS;
     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
       result |= CPU_CLMUL;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
+      result |= CPU_RTM;

     // AMD features.
     if (is_amd()) {
@@ -444,10 +460,32 @@
       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
         result |= CPU_SSE4A;
     }
+    // Intel features.
+    if(is_intel()) {
+      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+        result |= CPU_BMI2;
+      if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
+        result |= CPU_LZCNT;
+    }

     return result;
   }

+  static bool os_supports_avx_vectors() {
+    if (!supports_avx()) {
+      return false;
+    }
+    // Verify that OS save/restore all bits of AVX registers
+    // during signal processing.
+    int nreg = 2 LP64_ONLY(+2);
+    for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
+      if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   static void get_processor_features();

 public:
@@ -464,10 +502,26 @@
   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
   static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
+  static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
+
+  // The value used to check ymm register after signal handle
+  static int ymm_test_value()    { return 0xCAFEBABE; }
+
+  static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
+  static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
+  static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
+  static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
+
+  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
+  static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
+

   // Initialization
   static void initialize();

+  // Override Abstract_VM_Version implementation
+  static bool use_biased_locking();
+
   // Asserts
   static void assert_is_initialized() {
     assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
@@ -560,7 +614,9 @@
   static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
   static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
   static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }
-
+  static bool supports_rtm()      { return (_cpuFeatures & CPU_RTM) != 0; }
+  static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
+  static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/src/cpu/x86/vm/x86_32.ad	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Tue Mar 25 17:07:36 2014 -0700
@@ -1489,19 +1489,6 @@
   return EBP_REG_mask();
 }

-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return EAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  ShouldNotReachHere();
-  return RegMask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
 // Returns true if the high 32 bits of the value is known to be zero.
 bool is_operand_hi32_zero(Node* n) {
   int opc = n->Opcode();
@@ -2865,542 +2852,6 @@
     emit_d8    (cbuf,0 );
   %}

-
-  // Because the transitions from emitted code to the runtime
-  // monitorenter/exit helper stubs are so slow it's critical that
-  // we inline both the stack-locking fast-path and the inflated fast path.
-  //
-  // See also: cmpFastLock and cmpFastUnlock.
-  //
-  // What follows is a specialized inline transliteration of the code
-  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
-  // another option would be to emit TrySlowEnter and TrySlowExit methods
-  // at startup-time.  These methods would accept arguments as
-  // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
-  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
-  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
-  // In practice, however, the # of lock sites is bounded and is usually small.
-  // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
-  // if the processor uses simple bimodal branch predictors keyed by EIP
-  // Since the helper routines would be called from multiple synchronization
-  // sites.
-  //
-  // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
-  // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
-  // to those specialized methods.  That'd give us a mostly platform-independent
-  // implementation that the JITs could optimize and inline at their pleasure.
-  // Done correctly, the only time we'd need to cross to native could would be
-  // to park() or unpark() threads.  We'd also need a few more unsafe operators
-  // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
-  // (b) explicit barriers or fence operations.
-  //
-  // TODO:
-  //
-  // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
-  //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
-  //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
-  //    the lock operators would typically be faster than reifying Self.
-  //
-  // *  Ideally I'd define the primitives as:
-  //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
-  //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
-  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
-  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
-  //    Furthermore the register assignments are overconstrained, possibly resulting in
-  //    sub-optimal code near the synchronization site.
-  //
-  // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
-  //    Alternately, use a better sp-proximity test.
-  //
-  // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
-  //    Either one is sufficient to uniquely identify a thread.
-  //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
-  //
-  // *  Intrinsify notify() and notifyAll() for the common cases where the
-  //    object is locked by the calling thread but the waitlist is empty.
-  //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
-  //
-  // *  use jccb and jmpb instead of jcc and jmp to improve code density.
-  //    But beware of excessive branch density on AMD Opterons.
-  //
-  // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
-  //    or failure of the fast-path.  If the fast-path fails then we pass
-  //    control to the slow-path, typically in C.  In Fast_Lock and
-  //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
-  //    will emit a conditional branch immediately after the node.
-  //    So we have branches to branches and lots of ICC.ZF games.
-  //    Instead, it might be better to have C2 pass a "FailureLabel"
-  //    into Fast_Lock and Fast_Unlock.  In the case of success, control
-  //    will drop through the node.  ICC.ZF is undefined at exit.
-  //    In the case of failure, the node will branch directly to the
-  //    FailureLabel
-
-
-  // obj: object to lock
-  // box: on-stack box address (displaced header location) - KILLED
-  // rax,: tmp -- KILLED
-  // scr: tmp -- KILLED
-  enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-
-    // Ensure the register assignents are disjoint
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (objReg != scrReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg != scrReg, "") ;
-    guarantee (tmpReg == as_Register(EAX_enc), "") ;
-
-    MacroAssembler masm(&cbuf);
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // set box->dhw = unused_mark (3)
-        // Force all sync thru slow-path: slow_enter() and slow_exit()
-        masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
-        masm.cmpptr (rsp, (int32_t)0) ;
-    } else
-    if (EmitSync & 2) {
-        Label DONE_LABEL ;
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword
-        masm.orptr (tmpReg, 0x1);
-        masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
-        if (os::is_MP()) { masm.lock();  }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        masm.bind(DONE_LABEL) ;
-    } else {
-      // Possible cases that we'll encounter in fast_lock
-      // ------------------------------------------------
-      // * Inflated
-      //    -- unlocked
-      //    -- Locked
-      //       = by self
-      //       = by other
-      // * biased
-      //    -- by Self
-      //    -- by other
-      // * neutral
-      // * stack-locked
-      //    -- by self
-      //       = sp-proximity test hits
-      //       = sp-proximity test generates false-negative
-      //    -- by other
-      //
-
-      Label IsInflated, DONE_LABEL, PopDone ;
-
-      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
-      // order to reduce the number of conditional branches in the most common cases.
-      // Beware -- there's a subtle invariant that fetch of the markword
-      // at [FETCH], below, will never observe a biased encoding (*101b).
-      // If this invariant is not held we risk exclusion (safety) failure.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-        masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-      }
-
-      masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
-      masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
-      masm.jccb  (Assembler::notZero, IsInflated) ;
-
-      // Attempt stack-locking ...
-      masm.orptr (tmpReg, 0x1);
-      masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-      if (os::is_MP()) { masm.lock();  }
-      masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jccb (Assembler::equal, DONE_LABEL);
-
-      // Recursive locking
-      masm.subptr(tmpReg, rsp);
-      masm.andptr(tmpReg, 0xFFFFF003 );
-      masm.movptr(Address(boxReg, 0), tmpReg);
-      if (_counters != NULL) {
-        masm.cond_inc32(Assembler::equal,
-                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
-      }
-      masm.jmp  (DONE_LABEL) ;
-
-      masm.bind (IsInflated) ;
-
-      // The object is inflated.
-      //
-      // TODO-FIXME: eliminate the ugly use of manifest constants:
-      //   Use markOopDesc::monitor_value instead of "2".
-      //   use markOop::unused_mark() instead of "3".
-      // The tmpReg value is an objectMonitor reference ORed with
-      // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
-      // objectmonitor pointer by masking off the "2" bit or we can just
-      // use tmpReg as an objectmonitor pointer but bias the objectmonitor
-      // field offsets with "-2" to compensate for and annul the low-order tag bit.
-      //
-      // I use the latter as it avoids AGI stalls.
-      // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
-      // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
-      //
-      #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
-
-      // boxReg refers to the on-stack BasicLock in the current frame.
-      // We'd like to write:
-      //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
-      // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
-      // additional latency as we have another ST in the store buffer that must drain.
-
-      if (EmitSync & 8192) {
-         masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
-         masm.get_thread (scrReg) ;
-         masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
-         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-      } else
-      if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
-         masm.movptr(scrReg, boxReg) ;
-         masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form: consider XORL tmpReg,tmpReg
-           masm.movptr(tmpReg, NULL_WORD) ;
-         } else {
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           // Test-And-CAS instead of CAS
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ?
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Ideally, I'd manifest "Self" with get_thread and then attempt
-         // to CAS the register containing Self into m->Owner.
-         // But we don't have enough registers, so instead we can either try to CAS
-         // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
-         // we later store "Self" into m->Owner.  Transiently storing a stack address
-         // (rsp or the address of the box) into  m->owner is harmless.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         if (os::is_MP()) { masm.lock();  }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-         masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
-         masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
-         masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
-
-         // If the CAS fails we can either retry or pass control to the slow-path.
-         // We use the latter tactic.
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      } else {
-         masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
-         masm.movptr(boxReg, tmpReg) ;
-
-         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-            // prefetchw [eax + Offset(_owner)-2]
-            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
-         }
-
-         if ((EmitSync & 64) == 0) {
-           // Optimistic form
-           masm.xorptr  (tmpReg, tmpReg) ;
-         } else {
-           // Can suffer RTS->RTO upgrades on shared or cold $ lines
-           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
-           masm.testptr(tmpReg, tmpReg) ;                   // Locked ?
-           masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         }
-
-         // Appears unlocked - try to swing _owner from null to non-null.
-         // Use either "Self" (in scr) or rsp as thread identity in _owner.
-         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-         masm.get_thread (scrReg) ;
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-
-         // If the CAS fails we can either retry or pass control to the slow-path.
-         // We use the latter tactic.
-         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-         // If the CAS was successful ...
-         //   Self has acquired the lock
-         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-         // Intentional fall-through into DONE_LABEL ...
-      }
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch-to-branch on AMD processors
-      // This appears to be superstition.
-      if (EmitSync & 32) masm.nop() ;
-
-
-      // At DONE_LABEL the icc ZFlag is set as follows ...
-      // Fast_Unlock uses the same protocol.
-      // ZFlag == 1 -> Success
-      // ZFlag == 0 -> Failure - force control through the slow-path
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed.  Must be EAX.
-  // rbx,: killed tmp; cannot be obj nor box.
-  //
-  // Some commentary on balanced locking:
-  //
-  // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
-  // Methods that don't have provably balanced locking are forced to run in the
-  // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
-  // The interpreter provides two properties:
-  // I1:  At return-time the interpreter automatically and quietly unlocks any
-  //      objects acquired the current activation (frame).  Recall that the
-  //      interpreter maintains an on-stack list of locks currently held by
-  //      a frame.
-  // I2:  If a method attempts to unlock an object that is not held by the
-  //      the frame the interpreter throws IMSX.
-  //
-  // Lets say A(), which has provably balanced locking, acquires O and then calls B().
-  // B() doesn't have provably balanced locking so it runs in the interpreter.
-  // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
-  // is still locked by A().
-  //
-  // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
-  // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
-  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
-  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
-
-  enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-
-    guarantee (objReg != boxReg, "") ;
-    guarantee (objReg != tmpReg, "") ;
-    guarantee (boxReg != tmpReg, "") ;
-    guarantee (boxReg == as_Register(EAX_enc), "") ;
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-      // Disable - inhibit all inlining.  Force control through the slow-path
-      masm.cmpptr (rsp, 0) ;
-    } else
-    if (EmitSync & 8) {
-      Label DONE_LABEL ;
-      if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-      // classic stack-locking code ...
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      masm.testptr(tmpReg, tmpReg) ;
-      masm.jcc   (Assembler::zero, DONE_LABEL) ;
-      if (os::is_MP()) { masm.lock(); }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
-      masm.bind(DONE_LABEL);
-    } else {
-      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
-
-      // Critically, the biased locking test must have precedence over
-      // and appear before the (box->dhw == 0) recursive stack-lock test.
-      if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-      }
-
-      masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
-      masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
-      masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
-
-      masm.testptr(tmpReg, 0x02) ;                     // Inflated?
-      masm.jccb  (Assembler::zero, Stacked) ;
-
-      masm.bind  (Inflated) ;
-      // It's inflated.
-      // Despite our balanced locking property we still check that m->_owner == Self
-      // as java routines or native JNI code called by this thread might
-      // have released the lock.
-      // Refer to the comments in synchronizer.cpp for how we might encode extra
-      // state in _succ so we can avoid fetching EntryList|cxq.
-      //
-      // I'd like to add more cases in fast_lock() and fast_unlock() --
-      // such as recursive enter and exit -- but we have to be wary of
-      // I$ bloat, T$ effects and BP$ effects.
-      //
-      // If there's no contention try a 1-0 exit.  That is, exit without
-      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
-      // we detect and recover from the race that the 1-0 exit admits.
-      //
-      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
-      // before it STs null into _owner, releasing the lock.  Updates
-      // to data protected by the critical section must be visible before
-      // we drop the lock (and thus before any other thread could acquire
-      // the lock and observe the fields protected by the lock).
-      // IA32's memory-model is SPO, so STs are ordered with respect to
-      // each other and there's no need for an explicit barrier (fence).
-      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-
-      masm.get_thread (boxReg) ;
-      if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-        // prefetchw [ebx + Offset(_owner)-2]
-        masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
-      }
-
-      // Note that we could employ various encoding schemes to reduce
-      // the number of loads below (currently 4) to just 2 or 3.
-      // Refer to the comments in synchronizer.cpp.
-      // In practice the chain of fetches doesn't seem to impact performance, however.
-      if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
-         // Attempt to reduce branch density - AMD's branch predictor.
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
-         masm.jmpb  (DONE_LABEL) ;
-      } else {
-         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-         masm.jccb  (Assembler::notZero, DONE_LABEL) ;
-         masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
-         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
-         masm.jccb  (Assembler::notZero, CheckSucc) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
-         masm.jmpb  (DONE_LABEL) ;
-      }
-
-      // The Following code fragment (EmitSync & 65536) improves the performance of
-      // contended applications and contended synchronization microbenchmarks.
-      // Unfortunately the emission of the code - even though not executed - causes regressions
-      // in scimark and jetstream, evidently because of $ effects.  Replacing the code
-      // with an equal number of never-executed NOPs results in the same regression.
-      // We leave it off by default.
-
-      if ((EmitSync & 65536) != 0) {
-         Label LSuccess, LGoSlowPath ;
-
-         masm.bind  (CheckSucc) ;
-
-         // Optional pre-test ... it's safe to elide this
-         if ((EmitSync & 16) == 0) {
-            masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
-            masm.jccb  (Assembler::zero, LGoSlowPath) ;
-         }
-
-         // We have a classic Dekker-style idiom:
-         //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
-         // There are a number of ways to implement the barrier:
-         // (1) lock:andl &m->_owner, 0
-         //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
-         //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
-         //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
-         // (2) If supported, an explicit MFENCE is appealing.
-         //     In older IA32 processors MFENCE is slower than lock:add or xchg
-         //     particularly if the write-buffer is full as might be the case if
-         //     if stores closely precede the fence or fence-equivalent instruction.
-         //     In more modern implementations MFENCE appears faster, however.
-         // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
-         //     The $lines underlying the top-of-stack should be in M-state.
-         //     The locked add instruction is serializing, of course.
-         // (4) Use xchg, which is serializing
-         //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
-         // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
-         //     The integer condition codes will tell us if succ was 0.
-         //     Since _succ and _owner should reside in the same $line and
-         //     we just stored into _owner, it's likely that the $line
-         //     remains in M-state for the lock:orl.
-         //
-         // We currently use (3), although it's likely that switching to (2)
-         // is correct for the future.
-
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
-         if (os::is_MP()) {
-            if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
-              masm.mfence();
-            } else {
-              masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
-            }
-         }
-         // Ratify _succ remains non-null
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
-         masm.jccb  (Assembler::notZero, LSuccess) ;
-
-         masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jccb  (Assembler::notEqual, LSuccess) ;
-         // Since we're low on registers we installed rsp as a placeholding in _owner.
-         // Now install Self over rsp.  This is safe as we're transitioning from
-         // non-null to non=null
-         masm.get_thread (boxReg) ;
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
-         // Intentional fall-through into LGoSlowPath ...
-
-         masm.bind  (LGoSlowPath) ;
-         masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmpb  (DONE_LABEL) ;
-
-         masm.bind  (LSuccess) ;
-         masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
-         masm.jmpb  (DONE_LABEL) ;
-      }
-
-      masm.bind (Stacked) ;
-      // It's not inflated and it's not recursively stack-locked and it's not biased.
-      // It must be stack-locked.
-      // Try to reset the header to displaced header.
-      // The "box" value on the stack is stable, so we can reload
-      // and be assured we observe the same value as above.
-      masm.movptr(tmpReg, Address(boxReg, 0)) ;
-      if (os::is_MP()) {   masm.lock();    }
-      masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
-      // Intention fall-thru into DONE_LABEL
-
-
-      // DONE_LABEL is a hot target - we'd really like to place it at the
-      // start of cache line by padding with NOPs.
-      // See the AMD and Intel software optimization manuals for the
-      // most efficient "long" NOP encodings.
-      // Unfortunately none of our alignment mechanisms suffice.
-      if ((EmitSync & 65536) == 0) {
-         masm.bind (CheckSucc) ;
-      }
-      masm.bind(DONE_LABEL);
-
-      // Avoid branch to branch on AMD processors
-      if (EmitSync & 32768) { masm.nop() ; }
-    }
-  %}
-
-
   enc_class enc_pop_rdx() %{
     emit_opcode(cbuf,0x5A);
   %}
@@ -5659,6 +5110,19 @@
 %}

 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
+  predicate(UseCountTrailingZerosInstruction);
+  match(Set dst (CountTrailingZerosI src));
+  effect(KILL cr);
+
+  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
+  ins_encode %{
+    __ tzcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
+  predicate(!UseCountTrailingZerosInstruction);
   match(Set dst (CountTrailingZerosI src));
   effect(KILL cr);

@@ -5678,6 +5142,30 @@
 %}

 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
+  predicate(UseCountTrailingZerosInstruction);
+  match(Set dst (CountTrailingZerosL src));
+  effect(TEMP dst, KILL cr);
+
+  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
+            "JNC    done\n\t"
+            "TZCNT  $dst, $src.hi\n\t"
+            "ADD    $dst, 32\n"
+            "done:" %}
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    Label done;
+    __ tzcntl(Rdst, Rsrc);
+    __ jccb(Assembler::carryClear, done);
+    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
+    __ addl(Rdst, BitsPerInt);
+    __ bind(done);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
+  predicate(!UseCountTrailingZerosInstruction);
   match(Set dst (CountTrailingZerosL src));
   effect(TEMP dst, KILL cr);

@@ -7492,44 +6980,6 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------

-instruct addExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "ADD    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "ADD    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(AddExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "ADD    $dst,$src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe( ialu_reg_mem );
-%}
-
-
 // Integer Addition Instructions
 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
@@ -7839,43 +7289,6 @@

 //----------Subtraction Instructions-------------------------------------------

-instruct subExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "SUB    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "SUB    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(SubExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "SUB    $dst,$src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe( ialu_reg_mem );
-%}
-
 // Integer Subtraction Instructions
 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
@@ -7944,17 +7357,6 @@
   ins_pipe( ialu_reg );
 %}

-instruct negExactI_eReg(eAXRegI dst, eFlagsReg cr) %{
-  match(NegExactI dst);
-  effect(DEF cr);
-
-  format %{ "NEG    $dst\t# negExact int"%}
-  ins_encode %{
-    __ negl($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
@@ -8166,46 +7568,6 @@
   ins_pipe( pipe_slow );
 %}

-instruct mulExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr)
-%{
-  match(MulExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_eReg_imm(eAXRegI dst, rRegI src, immI imm, eFlagsReg cr)
-%{
-  match(MulExactI src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst, $src, $imm\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr)
-%{
-  match(MulExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "IMUL   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
-
 // Integer DIV with Register
 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
   match(Set rax (DivI rax div));
@@ -8649,6 +8011,123 @@
   ins_pipe( ialu_mem_imm );
 %}

+// BMI1 instructions
+instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
+  match(Set dst (AndI (XorI src1 minus_1) src2));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "ANDNL  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
+  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "ANDNL  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
+  match(Set dst (AndI (SubI imm_zero src) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "BLSIL  $dst, $src" %}
+
+  ins_encode %{
+    __ blsil($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
+  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "BLSIL  $dst, $src" %}
+
+  ins_encode %{
+    __ blsil($dst$$Register, $src$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (XorI (AddI src minus_1) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "BLSMSKL $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskl($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "BLSMSKL $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskl($dst$$Register, $src$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (AndI (AddI src minus_1) src) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "BLSRL  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrl($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "BLSRL  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrl($dst$$Register, $src$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Or Instructions
 // Or Register with Register
 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
@@ -9071,6 +8550,91 @@
 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 */
+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "ADD    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "ADD    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "CMP    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "CMP    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
+%{
+  match(Set cr (OverflowSubI zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "NEG    $op2\t# overflow check int" %}
+  ins_encode %{
+    __ negl($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}

 //----------Long Instructions------------------------------------------------
 // Add Long Register with Register
@@ -9186,6 +8750,210 @@
   ins_pipe( ialu_reg_long_mem );
 %}

+// BMI1 instructions
+instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
+  match(Set dst (AndL (XorL src1 minus_1) src2));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
+            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
+         %}
+
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Register Rsrc2 = $src2$$Register;
+    __ andnl(Rdst, Rsrc1, Rsrc2);
+    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
+  %}
+  ins_pipe(ialu_reg_reg_long);
+%}
+
+instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
+  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  ins_cost(125);
+  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
+            "ANDNL  $dst.hi, $src1.hi, $src2+4"
+         %}
+
+  ins_encode %{
+    Register Rdst = $dst$$Register;
+    Register Rsrc1 = $src1$$Register;
+    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
+
+    __ andnl(Rdst, Rsrc1, $src2$$Address);
+    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
+  match(Set dst (AndL (SubL imm_zero src) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  format %{ "MOVL   $dst.hi, 0\n\t"
+            "BLSIL  $dst.lo, $src.lo\n\t"
+            "JNZ    done\n\t"
+            "BLSIL  $dst.hi, $src.hi\n"
+            "done:"
+         %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    __ movl(HIGH_FROM_LOW(Rdst), 0);
+    __ blsil(Rdst, Rsrc);
+    __ jccb(Assembler::notZero, done);
+    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+    __ bind(done);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
+  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  ins_cost(125);
+  format %{ "MOVL   $dst.hi, 0\n\t"
+            "BLSIL  $dst.lo, $src\n\t"
+            "JNZ    done\n\t"
+            "BLSIL  $dst.hi, $src+4\n"
+            "done:"
+         %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+
+    __ movl(HIGH_FROM_LOW(Rdst), 0);
+    __ blsil(Rdst, $src$$Address);
+    __ jccb(Assembler::notZero, done);
+    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
+    __ bind(done);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (XorL (AddL src minus_1) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  format %{ "MOVL    $dst.hi, 0\n\t"
+            "BLSMSKL $dst.lo, $src.lo\n\t"
+            "JNC     done\n\t"
+            "BLSMSKL $dst.hi, $src.hi\n"
+            "done:"
+         %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    __ movl(HIGH_FROM_LOW(Rdst), 0);
+    __ blsmskl(Rdst, Rsrc);
+    __ jccb(Assembler::carryClear, done);
+    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+    __ bind(done);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  ins_cost(125);
+  format %{ "MOVL    $dst.hi, 0\n\t"
+            "BLSMSKL $dst.lo, $src\n\t"
+            "JNC     done\n\t"
+            "BLSMSKL $dst.hi, $src+4\n"
+            "done:"
+         %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+
+    __ movl(HIGH_FROM_LOW(Rdst), 0);
+    __ blsmskl(Rdst, $src$$Address);
+    __ jccb(Assembler::carryClear, done);
+    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
+    __ bind(done);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (AndL (AddL src minus_1) src) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  format %{ "MOVL   $dst.hi, $src.hi\n\t"
+            "BLSRL  $dst.lo, $src.lo\n\t"
+            "JNC    done\n\t"
+            "BLSRL  $dst.hi, $src.hi\n"
+            "done:"
+  %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Register Rsrc = $src$$Register;
+    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+    __ blsrl(Rdst, Rsrc);
+    __ jccb(Assembler::carryClear, done);
+    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
+    __ bind(done);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
+%{
+  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr, TEMP dst);
+
+  ins_cost(125);
+  format %{ "MOVL   $dst.hi, $src+4\n\t"
+            "BLSRL  $dst.lo, $src\n\t"
+            "JNC    done\n\t"
+            "BLSRL  $dst.hi, $src+4\n"
+            "done:"
+  %}
+
+  ins_encode %{
+    Label done;
+    Register Rdst = $dst$$Register;
+    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
+    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
+    __ blsrl(Rdst, $src$$Address);
+    __ jccb(Assembler::carryClear, done);
+    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
+    __ bind(done);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
 // Or Long Register with Register
 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
   match(Set dst (OrL dst src));
@@ -13104,23 +12872,44 @@

 // inlined locking and unlocking

-
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
-  match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr, USE_KILL box );
+instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
+  predicate(Compile::current()->use_rtm());
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
+  ins_cost(300);
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 _counters, _rtm_counters, _stack_rtm_counters,
+                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                 true, ra_->C->profile_rtm());
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+  predicate(!Compile::current()->use_rtm());
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
   ins_cost(300);
   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode( Fast_Lock(object,box,tmp,scr) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
-  match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp, USE_KILL box );
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, USE_KILL box);
   ins_cost(300);
   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
-  ins_encode( Fast_Unlock(object,box,tmp) );
-  ins_pipe( pipe_slow );
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
+  %}
+  ins_pipe(pipe_slow);
 %}
--- a/src/cpu/x86/vm/x86_64.ad	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Tue Mar 25 17:07:36 2014 -0700
@@ -1600,18 +1600,6 @@
   return PTR_RBP_REG_mask();
 }

-const RegMask Matcher::mathExactI_result_proj_mask() {
-  return INT_RAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactL_result_proj_mask() {
-  return LONG_RAX_REG_mask();
-}
-
-const RegMask Matcher::mathExactI_flags_proj_mask() {
-  return INT_FLAGS_mask();
-}
-
 %}

 //----------ENCODING BLOCK-----------------------------------------------------
@@ -2542,231 +2530,6 @@
   %}


-  // obj: object to lock
-  // box: box address (header location) -- killed
-  // tmp: rax -- killed
-  // scr: rbx -- killed
-  //
-  // What follows is a direct transliteration of fast_lock() and fast_unlock()
-  // from i486.ad.  See that file for comments.
-  // TODO: where possible switch from movq (r, 0) to movl(r,0) and
-  // use the shorter encoding.  (Movl clears the high-order 32-bits).
-
-
-  enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
-  %{
-    Register objReg = as_Register((int)$obj$$reg);
-    Register boxReg = as_Register((int)$box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    Register scrReg = as_Register($scr$$reg);
-    MacroAssembler masm(&cbuf);
-
-    // Verify uniqueness of register assignments -- necessary but not sufficient
-    assert (objReg != boxReg && objReg != tmpReg &&
-            objReg != scrReg && tmpReg != scrReg, "invariant") ;
-
-    if (_counters != NULL) {
-      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
-    }
-    if (EmitSync & 1) {
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-        masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
-    } else
-    if (EmitSync & 2) {
-        Label DONE_LABEL;
-        if (UseBiasedLocking) {
-           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
-        }
-        // QQQ was movl...
-        masm.movptr(tmpReg, 0x1);
-        masm.orptr(tmpReg, Address(objReg, 0));
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (os::is_MP()) {
-          masm.lock();
-        }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        masm.jcc(Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-
-        masm.bind(DONE_LABEL);
-        masm.nop(); // avoid branch to branch
-    } else {
-        Label DONE_LABEL, IsInflated, Egress;
-
-        masm.movptr(tmpReg, Address(objReg, 0)) ;
-        masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
-        masm.jcc   (Assembler::notZero, IsInflated) ;
-
-        // it's stack-locked, biased or neutral
-        // TODO: optimize markword triage order to reduce the number of
-        // conditional branches in the most common cases.
-        // Beware -- there's a subtle invariant that fetch of the markword
-        // at [FETCH], below, will never observe a biased encoding (*101b).
-        // If this invariant is not held we'll suffer exclusion (safety) failure.
-
-        if (UseBiasedLocking && !UseOptoBiasInlining) {
-          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
-          masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
-        }
-
-        // was q will it destroy high?
-        masm.orl   (tmpReg, 1) ;
-        masm.movptr(Address(boxReg, 0), tmpReg) ;
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jcc   (Assembler::equal, DONE_LABEL);
-
-        // Recursive locking
-        masm.subptr(tmpReg, rsp);
-        masm.andptr(tmpReg, 7 - os::vm_page_size());
-        masm.movptr(Address(boxReg, 0), tmpReg);
-        if (_counters != NULL) {
-           masm.cond_inc32(Assembler::equal,
-                           ExternalAddress((address) _counters->fast_path_entry_count_addr()));
-        }
-        masm.jmp   (DONE_LABEL) ;
-
-        masm.bind  (IsInflated) ;
-        // It's inflated
-
-        // TODO: someday avoid the ST-before-CAS penalty by
-        // relocating (deferring) the following ST.
-        // We should also think about trying a CAS without having
-        // fetched _owner.  If the CAS is successful we may
-        // avoid an RTO->RTS upgrade on the $line.
-        // Without cast to int32_t a movptr will destroy r10 which is typically obj
-        masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-
-        masm.mov    (boxReg, tmpReg) ;
-        masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        masm.testptr(tmpReg, tmpReg) ;
-        masm.jcc    (Assembler::notZero, DONE_LABEL) ;
-
-        // It's inflated and appears unlocked
-        if (os::is_MP()) { masm.lock(); }
-        masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-        // Intentional fall-through into DONE_LABEL ...
-
-        masm.bind  (DONE_LABEL) ;
-        masm.nop   () ;                 // avoid jmp to jmp
-    }
-  %}
-
-  // obj: object to unlock
-  // box: box address (displaced header location), killed
-  // RBX: killed tmp; cannot be obj nor box
-  enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
-  %{
-
-    Register objReg = as_Register($obj$$reg);
-    Register boxReg = as_Register($box$$reg);
-    Register tmpReg = as_Register($tmp$$reg);
-    MacroAssembler masm(&cbuf);
-
-    if (EmitSync & 4) {
-       masm.cmpptr(rsp, 0) ;
-    } else
-    if (EmitSync & 8) {
-       Label DONE_LABEL;
-       if (UseBiasedLocking) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       // Check whether the displaced header is 0
-       //(=> recursive unlock)
-       masm.movptr(tmpReg, Address(boxReg, 0));
-       masm.testptr(tmpReg, tmpReg);
-       masm.jcc(Assembler::zero, DONE_LABEL);
-
-       // If not recursive lock, reset the header to displaced header
-       if (os::is_MP()) {
-         masm.lock();
-       }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-       masm.bind(DONE_LABEL);
-       masm.nop(); // avoid branch to branch
-    } else {
-       Label DONE_LABEL, Stacked, CheckSucc ;
-
-       if (UseBiasedLocking && !UseOptoBiasInlining) {
-         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-       }
-
-       masm.movptr(tmpReg, Address(objReg, 0)) ;
-       masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
-       masm.jcc   (Assembler::zero, DONE_LABEL) ;
-       masm.testl (tmpReg, 0x02) ;
-       masm.jcc   (Assembler::zero, Stacked) ;
-
-       // It's inflated
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-       masm.xorptr(boxReg, r15_thread) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, DONE_LABEL) ;
-       masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
-       masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
-       masm.jcc   (Assembler::notZero, CheckSucc) ;
-       masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-       masm.jmp   (DONE_LABEL) ;
-
-       if ((EmitSync & 65536) == 0) {
-         Label LSuccess, LGoSlowPath ;
-         masm.bind  (CheckSucc) ;
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::zero, LGoSlowPath) ;
-
-         // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
-         // the explicit ST;MEMBAR combination, but masm doesn't currently support
-         // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
-         // are all faster when the write buffer is populated.
-         masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         if (os::is_MP()) {
-            masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
-         }
-         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
-         masm.jcc   (Assembler::notZero, LSuccess) ;
-
-         masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
-         if (os::is_MP()) { masm.lock(); }
-         masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-         masm.jcc   (Assembler::notEqual, LSuccess) ;
-         // Intentional fall-through into slow-path
-
-         masm.bind  (LGoSlowPath) ;
-         masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
-         masm.jmp   (DONE_LABEL) ;
-
-         masm.bind  (LSuccess) ;
-         masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
-         masm.jmp   (DONE_LABEL) ;
-       }
-
-       masm.bind  (Stacked) ;
-       masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
-       if (os::is_MP()) { masm.lock(); }
-       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-
-       if (EmitSync & 65536) {
-          masm.bind (CheckSucc) ;
-       }
-       masm.bind(DONE_LABEL);
-       if (EmitSync & 32768) {
-          masm.nop();                      // avoid branch to branch
-       }
-    }
-  %}
-
-
   enc_class enc_rethrow()
   %{
     cbuf.set_insts_mark();
@@ -6202,6 +5965,19 @@
 %}

 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
+  predicate(UseCountTrailingZerosInstruction);
+  match(Set dst (CountTrailingZerosI src));
+  effect(KILL cr);
+
+  format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
+  ins_encode %{
+    __ tzcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
+  predicate(!UseCountTrailingZerosInstruction);
   match(Set dst (CountTrailingZerosI src));
   effect(KILL cr);

@@ -6221,6 +5997,19 @@
 %}

 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
+  predicate(UseCountTrailingZerosInstruction);
+  match(Set dst (CountTrailingZerosL src));
+  effect(KILL cr);
+
+  format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
+  ins_encode %{
+    __ tzcntq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
+  predicate(!UseCountTrailingZerosInstruction);
   match(Set dst (CountTrailingZerosL src));
   effect(KILL cr);

@@ -6906,82 +6695,6 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------

-instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
-%{
-  match(AddExactI dst src);
-  effect(DEF cr);
-
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(AddExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125); // XXX
-  format %{ "addl    $dst, $src\t# addExact int" %}
-  ins_encode %{
-    __ addl($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct addExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(AddExactL dst src);
-  effect(DEF cr);
-
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
-%{
-  match(AddExactL dst src);
-  effect(DEF cr);
-
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct addExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
-%{
-  match(AddExactL dst (LoadL src));
-  effect(DEF cr);
-
-  ins_cost(125); // XXX
-  format %{ "addq    $dst, $src\t# addExact long" %}
-  ins_encode %{
-    __ addq($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 %{
   match(Set dst (AddI dst src));
@@ -7594,80 +7307,6 @@
   ins_pipe(ialu_mem_imm);
 %}

-instruct subExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(SubExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "subl    $dst, $src\t# subExact int" %}
-  ins_encode %{
-    __ subl($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct subExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(SubExactL dst src);
-  effect(DEF cr);
-
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr)
-%{
-  match(SubExactL dst (LoadL src));
-  effect(DEF cr);
-
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct subExactL_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(SubExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(125);
-  format %{ "subq    $dst, $src\t# subExact long" %}
-  ins_encode %{
-    __ subq($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 %{
   match(Set dst (SubL dst src));
@@ -7784,31 +7423,6 @@
   ins_pipe(ialu_reg);
 %}

-instruct negExactI_rReg(rax_RegI dst, rFlagsReg cr)
-%{
-  match(NegExactI dst);
-  effect(KILL cr);
-
-  format %{ "negl    $dst\t# negExact int" %}
-  ins_encode %{
-    __ negl($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct negExactL_rReg(rax_RegL dst, rFlagsReg cr)
-%{
-  match(NegExactL dst);
-  effect(KILL cr);
-
-  format %{ "negq    $dst\t# negExact long" %}
-  ins_encode %{
-    __ negq($dst$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
@@ -7925,86 +7539,6 @@
   ins_pipe(ialu_reg_reg_alu0);
 %}

-
-instruct mulExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr)
-%{
-  match(MulExactI dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imull   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-
-instruct mulExactI_rReg_imm(rax_RegI dst, rRegI src, immI imm, rFlagsReg cr)
-%{
-  match(MulExactI src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imull   $dst, $src, $imm\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr)
-%{
-  match(MulExactI dst (LoadI src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "imull   $dst, $src\t# mulExact int" %}
-  ins_encode %{
-    __ imull($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
-instruct mulExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr)
-%{
-  match(MulExactL dst src);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imulq   $dst, $src\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactL_rReg_imm(rax_RegL dst, rRegL src, immL32 imm, rFlagsReg cr)
-%{
-  match(MulExactL src imm);
-  effect(DEF cr);
-
-  ins_cost(300);
-  format %{ "imulq   $dst, $src, $imm\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Register, $imm$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct mulExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr)
-%{
-  match(MulExactL dst (LoadL src));
-  effect(DEF cr);
-
-  ins_cost(350);
-  format %{ "imulq   $dst, $src\t# mulExact long" %}
-  ins_encode %{
-    __ imulq($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem_alu0);
-%}
-
 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
                    rFlagsReg cr)
 %{
@@ -9057,6 +8591,122 @@
   ins_pipe(ialu_mem_imm);
 %}

+// BMI1 instructions
+instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
+  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "andnl  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
+  match(Set dst (AndI (XorI src1 minus_1) src2));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "andnl  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
+  match(Set dst (AndI (SubI imm_zero src) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsil  $dst, $src" %}
+
+  ins_encode %{
+    __ blsil($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
+  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsil  $dst, $src" %}
+
+  ins_encode %{
+    __ blsil($dst$$Register, $src$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsmskl $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskl($dst$$Register, $src$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (XorI (AddI src minus_1) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsmskl $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskl($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (AndI (AddI src minus_1) src) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsrl  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrl($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsrl  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrl($dst$$Register, $src$$Address);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
 // Or Instructions
 // Or Register with Register
 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
@@ -9288,6 +8938,122 @@
   ins_pipe(ialu_mem_imm);
 %}

+// BMI1 instructions
+instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
+  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "andnq  $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
+  match(Set dst (AndL (XorL src1 minus_1) src2));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "andnq  $dst, $src1, $src2" %}
+
+  ins_encode %{
+  __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
+  match(Set dst (AndL (SubL imm_zero src) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsiq  $dst, $src" %}
+
+  ins_encode %{
+    __ blsiq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
+  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsiq  $dst, $src" %}
+
+  ins_encode %{
+    __ blsiq($dst$$Register, $src$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsmskq $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskq($dst$$Register, $src$$Address);
+  %}
+  ins_pipe(ialu_reg_mem);
+%}
+
+instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (XorL (AddL src minus_1) src));
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsmskq $dst, $src" %}
+
+  ins_encode %{
+    __ blsmskq($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (AndL (AddL src minus_1) src) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  format %{ "blsrq  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrq($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
+%{
+  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
+  predicate(UseBMI1Instructions);
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "blsrq  $dst, $src" %}
+
+  ins_encode %{
+    __ blsrq($dst$$Register, $src$$Address);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
 // Or Instructions
 // Or Register with Register
 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
@@ -10613,6 +10379,174 @@
   ins_pipe( pipe_slow );
 %}

+//----------Overflow Math Instructions-----------------------------------------
+
+instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addl    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
+%{
+  match(Set cr (OverflowAddI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addl    $op1, $op2\t# overflow check int" %}
+
+  ins_encode %{
+    __ addl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ addq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
+%{
+  match(Set cr (OverflowAddL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "addq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ addq($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
+%{
+  match(Set cr (OverflowSubI op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ cmpl($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ cmpq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
+%{
+  match(Set cr (OverflowSubL op1 op2));
+
+  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ cmpq($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
+%{
+  match(Set cr (OverflowSubI zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "negl    $op2\t# overflow check int" %}
+  ins_encode %{
+    __ negl($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
+%{
+  match(Set cr (OverflowSubL zero op2));
+  effect(DEF cr, USE_KILL op2);
+
+  format %{ "negq    $op2\t# overflow check long" %}
+  ins_encode %{
+    __ negq($op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "imull    $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
+%{
+  match(Set cr (OverflowMulI op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
+  ins_encode %{
+    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+  effect(DEF cr, USE_KILL op1, USE op2);
+
+  format %{ "imulq    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ imulq($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+
+instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
+%{
+  match(Set cr (OverflowMulL op1 op2));
+  effect(DEF cr, TEMP tmp, USE op1, USE op2);
+
+  format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg_alu0);
+%}
+

 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
@@ -11396,27 +11330,43 @@
 // ============================================================================
 // inlined locking and unlocking

-instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
-%{
+instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
+  predicate(Compile::current()->use_rtm());
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
+  ins_cost(300);
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 _counters, _rtm_counters, _stack_rtm_counters,
+                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                 true, ra_->C->profile_rtm());
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
+  predicate(!Compile::current()->use_rtm());
   match(Set cr (FastLock object box));
   effect(TEMP tmp, TEMP scr, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode(Fast_Lock(object, box, tmp, scr));
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
+  %}
   ins_pipe(pipe_slow);
 %}

-instruct cmpFastUnlock(rFlagsReg cr,
-                       rRegP object, rax_RegP box, rRegP tmp)
-%{
+instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, USE_KILL box);
-
   ins_cost(300);
   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
-  ins_encode(Fast_Unlock(object, box, tmp));
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
+  %}
   ins_pipe(pipe_slow);
 %}
--- a/src/os/bsd/vm/os_bsd.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/bsd/vm/os_bsd.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2590,9 +2590,21 @@
   }
 }

-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os::sleep(Thread::current(), 1, false);
+void os::naked_short_sleep(jlong ms) {
+  struct timespec req;
+
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  req.tv_sec = 0;
+  if (ms > 0) {
+    req.tv_nsec = (ms % 1000) * 1000000;
+  }
+  else {
+    req.tv_nsec = 1;
+  }
+
+  nanosleep(&req, NULL);
+
+  return;
 }

 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
--- a/src/os/linux/vm/os_linux.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -109,6 +109,8 @@

 #define MAX_PATH    (2 * K)

+#define MAX_SECS 100000000
+
 // for timer info max values which include all bits
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)

@@ -2433,7 +2435,6 @@
     sem_t _semaphore;
 };

-
 Semaphore::Semaphore() {
   sem_init(&_semaphore, 0, 0);
 }
@@ -2455,8 +2456,22 @@
 }

 bool Semaphore::timedwait(unsigned int sec, int nsec) {
+
   struct timespec ts;
-  unpackTime(&ts, false, (sec * NANOSECS_PER_SEC) + nsec);
+  // Semaphore's are always associated with CLOCK_REALTIME
+  os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
+  // see unpackTime for discussion on overflow checking
+  if (sec >= MAX_SECS) {
+    ts.tv_sec += MAX_SECS;
+    ts.tv_nsec = 0;
+  } else {
+    ts.tv_sec += sec;
+    ts.tv_nsec += nsec;
+    if (ts.tv_nsec >= NANOSECS_PER_SEC) {
+      ts.tv_nsec -= NANOSECS_PER_SEC;
+      ++ts.tv_sec; // note: this must be <= max_secs
+    }
+  }

   while (1) {
     int result = sem_timedwait(&_semaphore, &ts);
@@ -2961,7 +2976,9 @@

   unsigned char vec[1];
   unsigned imin = 1, imax = pages + 1, imid;
-  int mincore_return_value;
+  int mincore_return_value = 0;
+
+  assert(imin <= imax, "Unexpected page size");

   while (imin < imax) {
     imid = (imax + imin) / 2;
@@ -3833,9 +3850,33 @@
   }
 }

-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os::sleep(Thread::current(), 1, false);
+//
+// Short sleep, direct OS call.
+//
+// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
+// sched_yield(2) will actually give up the CPU:
+//
+//   * Alone on this pariticular CPU, keeps running.
+//   * Before the introduction of "skip_buddy" with "compat_yield" disabled
+//     (pre 2.6.39).
+//
+// So calling this with 0 is an alternative.
+//
+void os::naked_short_sleep(jlong ms) {
+  struct timespec req;
+
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  req.tv_sec = 0;
+  if (ms > 0) {
+    req.tv_nsec = (ms % 1000) * 1000000;
+  }
+  else {
+    req.tv_nsec = 1;
+  }
+
+  nanosleep(&req, NULL);
+
+  return;
 }

 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
@@ -5752,7 +5793,6 @@
  * is no need to track notifications.
  */

-#define MAX_SECS 100000000
 /*
  * This code is common to linux and solaris and will be moved to a
  * common place in dolphin.
--- a/src/os/linux/vm/perfMemory_linux.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/linux/vm/perfMemory_linux.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -891,8 +891,16 @@
   FREE_C_HEAP_ARRAY(char, filename, mtInternal);

   // open the shared memory file for the give vmid
-  fd = open_sharedmem_file(rfilename, file_flags, CHECK);
-  assert(fd != OS_ERR, "unexpected value");
+  fd = open_sharedmem_file(rfilename, file_flags, THREAD);
+
+  if (fd == OS_ERR) {
+    return;
+  }
+
+  if (HAS_PENDING_EXCEPTION) {
+    ::close(fd);
+    return;
+  }

   if (*sizep == 0) {
     size = sharedmem_filesize(fd, CHECK);
--- a/src/os/solaris/vm/os_solaris.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2232,8 +2232,8 @@
         st->cr();
         status = true;
       }
-      ::close(fd);
     }
+    ::close(fd);
   }
   return status;
 }
@@ -2967,7 +2967,7 @@
 char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) {
   const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE };
   const size_t types = sizeof(info_types) / sizeof(info_types[0]);
-  uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT];
+  uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT + 1];
   uint_t validity[MAX_MEMINFO_CNT];

   size_t page_size = MAX2((size_t)os::vm_page_size(), page_expected->size);
@@ -3006,7 +3006,7 @@
       }
     }

-    if (i != addrs_count) {
+    if (i < addrs_count) {
       if ((validity[i] & 2) != 0) {
         page_found->lgrp_id = outdata[types * i];
       } else {
@@ -3496,9 +3496,14 @@
   return os_sleep(millis, interruptible);
 }

-int os::naked_sleep() {
-  // %% make the sleep time an integer flag. for now use 1 millisec.
-  return os_sleep(1, false);
+void os::naked_short_sleep(jlong ms) {
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+
+  // usleep is deprecated and removed from POSIX, in favour of nanosleep, but
+  // Solaris requires -lrt for this.
+  usleep((ms * 1000));
+
+  return;
 }

 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
--- a/src/os/solaris/vm/perfMemory_solaris.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -431,10 +431,12 @@

       RESTARTABLE(::read(fd, addr, remaining), result);
       if (result == OS_ERR) {
+        ::close(fd);
         THROW_MSG_0(vmSymbols::java_io_IOException(), "Read error");
+      } else {
+        remaining-=result;
+        addr+=result;
       }
-      remaining-=result;
-      addr+=result;
     }

     ::close(fd);
@@ -906,8 +908,16 @@
   FREE_C_HEAP_ARRAY(char, filename, mtInternal);

   // open the shared memory file for the give vmid
-  fd = open_sharedmem_file(rfilename, file_flags, CHECK);
-  assert(fd != OS_ERR, "unexpected value");
+  fd = open_sharedmem_file(rfilename, file_flags, THREAD);
+
+  if (fd == OS_ERR) {
+    return;
+  }
+
+  if (HAS_PENDING_EXCEPTION) {
+    ::close(fd);
+    return;
+  }

   if (*sizep == 0) {
     size = sharedmem_filesize(fd, CHECK);
--- a/src/os/windows/vm/os_windows.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2437,6 +2437,12 @@
     }
   }

+  if ((exception_code == EXCEPTION_ACCESS_VIOLATION) &&
+      VM_Version::is_cpuinfo_segv_addr(pc)) {
+    // Verify that OS save/restore AVX registers.
+    return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr());
+  }
+
   if (t != NULL && t->is_Java_thread()) {
     JavaThread* thread = (JavaThread*) t;
     bool in_java = thread->thread_state() == _thread_in_Java;
@@ -3496,6 +3502,16 @@
   return result;
 }

+//
+// Short sleep, direct OS call.
+//
+// ms = 0, means allow others (if any) to run.
+//
+void os::naked_short_sleep(jlong ms) {
+  assert(ms < 1000, "Un-interruptable sleep, short time use only");
+  Sleep(ms);
+}
+
 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
 void os::infinite_sleep() {
   while (true) {    // sleep forever ...
@@ -3623,13 +3639,14 @@
          "possibility of dangling Thread pointer");

   OSThread* osthread = thread->osthread();
-  bool interrupted = osthread->interrupted();
   // There is no synchronization between the setting of the interrupt
   // and it being cleared here. It is critical - see 6535709 - that
   // we only clear the interrupt state, and reset the interrupt event,
   // if we are going to report that we were indeed interrupted - else
   // an interrupt can be "lost", leading to spurious wakeups or lost wakeups
-  // depending on the timing
+  // depending on the timing. By checking thread interrupt event to see
+  // if the thread gets real interrupt thus prevent spurious wakeup.
+  bool interrupted = osthread->interrupted() && (WaitForSingleObject(osthread->interrupt_event(), 0) == WAIT_OBJECT_0);
   if (interrupted && clear_interrupted) {
     osthread->set_interrupted(false);
     ResetEvent(osthread->interrupt_event());
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -492,6 +492,11 @@
       }
     }

+    if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
     // We test if stub is already set (by the stack overflow code
     // above) so it is not overwritten by the code that follows. This
     // check is not required on other platforms, because on other
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -337,6 +337,11 @@
       }
     }

+    if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
     if (thread->thread_state() == _thread_in_Java) {
       // Java thread running in Java code => find exception handler if any
       // a fault inside compiled code, the interpreter, or a stub
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -459,6 +459,11 @@
       }
     }

+    if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
     if (thread->thread_state() == _thread_in_vm) {
       if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) {
         stub = StubRoutines::handler_for_unsafe_access();
@@ -475,9 +480,11 @@
         // here if the underlying file has been truncated.
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
-          stub = StubRoutines::handler_for_unsafe_access();
+        if (cb != NULL) {
+          nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+          if (nm != NULL && nm->has_unsafe_access()) {
+            stub = StubRoutines::handler_for_unsafe_access();
+          }
         }
       }
       else
@@ -724,6 +731,7 @@
   err.report_and_die();

   ShouldNotReachHere();
+  return false;
 }

 void os::print_context(outputStream *st, void *context) {
--- a/src/share/vm/adlc/archDesc.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/adlc/archDesc.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1193,15 +1193,12 @@
          || strcmp(idealName,"CmpF") == 0
          || strcmp(idealName,"FastLock") == 0
          || strcmp(idealName,"FastUnlock") == 0
-         || strcmp(idealName,"AddExactI") == 0
-         || strcmp(idealName,"AddExactL") == 0
-         || strcmp(idealName,"SubExactI") == 0
-         || strcmp(idealName,"SubExactL") == 0
-         || strcmp(idealName,"MulExactI") == 0
-         || strcmp(idealName,"MulExactL") == 0
-         || strcmp(idealName,"NegExactI") == 0
-         || strcmp(idealName,"NegExactL") == 0
-         || strcmp(idealName,"FlagsProj") == 0
+         || strcmp(idealName,"OverflowAddI") == 0
+         || strcmp(idealName,"OverflowAddL") == 0
+         || strcmp(idealName,"OverflowSubI") == 0
+         || strcmp(idealName,"OverflowSubL") == 0
+         || strcmp(idealName,"OverflowMulI") == 0
+         || strcmp(idealName,"OverflowMulL") == 0
          || strcmp(idealName,"Bool") == 0
          || strcmp(idealName,"Binary") == 0 ) {
       // Removed ConI from the must_clone list.  CPUs that cannot use
--- a/src/share/vm/adlc/formssel.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -660,6 +660,7 @@
   int USE_of_memory  = 0;
   int DEF_of_memory  = 0;
   const char*    last_memory_DEF = NULL; // to test DEF/USE pairing in asserts
+  const char*    last_memory_USE = NULL;
   Component     *unique          = NULL;
   Component     *comp            = NULL;
   ComponentList &components      = (ComponentList &)_components;
@@ -681,7 +682,16 @@
           assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name");
           last_memory_DEF = NULL;
         }
-        USE_of_memory++;
+        // Handles same memory being used multiple times in the case of BMI1 instructions.
+        if (last_memory_USE != NULL) {
+          if (strcmp(comp->_name, last_memory_USE) != 0) {
+            USE_of_memory++;
+          }
+        } else {
+          USE_of_memory++;
+        }
+        last_memory_USE = comp->_name;
+
         if (DEF_of_memory == 0)  // defs take precedence
           unique = comp;
       } else {
--- a/src/share/vm/adlc/output_c.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/adlc/output_c.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1599,6 +1599,8 @@

       if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) {
         fprintf(fp, "  ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt);
       }

       // Fill in the bottom_type where requested
@@ -3980,6 +3982,8 @@
   }
   if( inst->is_ideal_fastlock() ) {
     fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent);
+    fprintf(fp_cpp, "%s node->_rtm_counters = _leaf->as_FastLock()->rtm_counters();\n", indent);
+    fprintf(fp_cpp, "%s node->_stack_rtm_counters = _leaf->as_FastLock()->stack_rtm_counters();\n", indent);
   }

 }
--- a/src/share/vm/ci/ciClassList.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/ci/ciClassList.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -103,6 +103,7 @@
 friend class ciMethodType;             \
 friend class ciReceiverTypeData;       \
 friend class ciTypeEntries;            \
+friend class ciSpeculativeTrapData;    \
 friend class ciSymbol;                 \
 friend class ciArray;                  \
 friend class ciObjArray;               \
--- a/src/share/vm/ci/ciEnv.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/ci/ciEnv.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -926,7 +926,8 @@
                             AbstractCompiler* compiler,
                             int comp_level,
                             bool has_unsafe_access,
-                            bool has_wide_vectors) {
+                            bool has_wide_vectors,
+                            RTMState  rtm_state) {
   VM_ENTRY_MARK;
   nmethod* nm = NULL;
   {
@@ -973,6 +974,15 @@

     methodHandle method(THREAD, target->get_Method());

+#if INCLUDE_RTM_OPT
+    if (!failing() && (rtm_state != NoRTM) &&
+        (method()->method_data() != NULL) &&
+        (method()->method_data()->rtm_state() != rtm_state)) {
+      // Preemptive decompile if rtm state was changed.
+      record_failure("RTM state change invalidated rtm code");
+    }
+#endif
+
     if (failing()) {
       // While not a true deoptimization, it is a preemptive decompile.
       MethodData* mdo = method()->method_data();
@@ -999,13 +1009,15 @@
                                frame_words, oop_map_set,
                                handler_table, inc_table,
                                compiler, comp_level);
-
     // Free codeBlobs
     code_buffer->free_blob();

     if (nm != NULL) {
       nm->set_has_unsafe_access(has_unsafe_access);
       nm->set_has_wide_vectors(has_wide_vectors);
+#if INCLUDE_RTM_OPT
+      nm->set_rtm_state(rtm_state);
+#endif

       // Record successful registration.
       // (Put nm into the task handle *before* publishing to the Java heap.)
--- a/src/share/vm/ci/ciEnv.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/ci/ciEnv.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -363,7 +363,8 @@
                        AbstractCompiler*         compiler,
                        int                       comp_level,
                        bool                      has_unsafe_access,
-                       bool                      has_wide_vectors);
+                       bool                      has_wide_vectors,
+                       RTMState                  rtm_state = NoRTM);


   // Access to certain well known ciObjects.
--- a/src/share/vm/ci/ciMethodData.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -78,6 +78,36 @@
   _parameters = NULL;
 }

+void ciMethodData::load_extra_data() {
+  MethodData* mdo = get_MethodData();
+
+  // speculative trap entries also hold a pointer to a Method so need to be translated
+  DataLayout* dp_src  = mdo->extra_data_base();
+  DataLayout* end_src = mdo->extra_data_limit();
+  DataLayout* dp_dst  = extra_data_base();
+  for (;; dp_src = MethodData::next_extra(dp_src), dp_dst = MethodData::next_extra(dp_dst)) {
+    assert(dp_src < end_src, "moved past end of extra data");
+    // New traps in the MDO can be added as we translate the copy so
+    // look at the entries in the copy.
+    switch(dp_dst->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      ciSpeculativeTrapData* data_dst = new ciSpeculativeTrapData(dp_dst);
+      SpeculativeTrapData* data_src = new SpeculativeTrapData(dp_src);
+      data_dst->translate_from(data_src);
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      break;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      // An empty slot or ArgInfoData entry marks the end of the trap data
+      return;
+    default:
+      fatal(err_msg("bad tag = %d", dp_dst->tag()));
+    }
+  }
+}
+
 void ciMethodData::load_data() {
   MethodData* mdo = get_MethodData();
   if (mdo == NULL) {
@@ -116,6 +146,8 @@
     parameters->translate_from(mdo->parameters_type_data());
   }

+  load_extra_data();
+
   // Note:  Extra data are all BitData, and do not need translation.
   _current_mileage = MethodData::mileage_of(mdo->method());
   _invocation_counter = mdo->invocation_count();
@@ -156,6 +188,12 @@
   set_type(translate_klass(k));
 }

+void ciSpeculativeTrapData::translate_from(const ProfileData* data) {
+  Method* m = data->as_SpeculativeTrapData()->method();
+  ciMethod* ci_m = CURRENT_ENV->get_method(m);
+  set_method(ci_m);
+}
+
 // Get the data at an arbitrary (sort of) data index.
 ciProfileData* ciMethodData::data_at(int data_index) {
   if (out_of_bounds(data_index)) {
@@ -203,32 +241,64 @@
   return next;
 }

-// Translate a bci to its corresponding data, or NULL.
-ciProfileData* ciMethodData::bci_to_data(int bci) {
-  ciProfileData* data = data_before(bci);
-  for ( ; is_valid(data); data = next_data(data)) {
-    if (data->bci() == bci) {
-      set_hint_di(dp_to_di(data->dp()));
-      return data;
-    } else if (data->bci() > bci) {
-      break;
-    }
-  }
+ciProfileData* ciMethodData::bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots) {
   // bci_to_extra_data(bci) ...
   DataLayout* dp  = data_layout_at(data_size());
   DataLayout* end = data_layout_at(data_size() + extra_data_size());
-  for (; dp < end; dp = MethodData::next_extra(dp)) {
-    if (dp->tag() == DataLayout::no_tag) {
+  two_free_slots = false;
+  for (;dp < end; dp = MethodData::next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
       _saw_free_extra_data = true;  // observed an empty slot (common case)
+      two_free_slots = (MethodData::next_extra(dp)->tag() == DataLayout::no_tag);
       return NULL;
+    case DataLayout::arg_info_data_tag:
+      return NULL; // ArgInfoData is at the end of extra data section.
+    case DataLayout::bit_data_tag:
+      if (m == NULL && dp->bci() == bci) {
+        return new ciBitData(dp);
+      }
+      break;
+    case DataLayout::speculative_trap_data_tag: {
+      ciSpeculativeTrapData* data = new ciSpeculativeTrapData(dp);
+      // data->method() might be null if the MDO is snapshotted
+      // concurrently with a trap
+      if (m != NULL && data->method() == m && dp->bci() == bci) {
+        return data;
+      }
+      break;
+    }
+    default:
+      fatal(err_msg("bad tag = %d", dp->tag()));
     }
-    if (dp->tag() == DataLayout::arg_info_data_tag) {
-      break; // ArgInfoData is at the end of extra data section.
+  }
+  return NULL;
+}
+
+// Translate a bci to its corresponding data, or NULL.
+ciProfileData* ciMethodData::bci_to_data(int bci, ciMethod* m) {
+  // If m is not NULL we look for a SpeculativeTrapData entry
+  if (m == NULL) {
+    ciProfileData* data = data_before(bci);
+    for ( ; is_valid(data); data = next_data(data)) {
+      if (data->bci() == bci) {
+        set_hint_di(dp_to_di(data->dp()));
+        return data;
+      } else if (data->bci() > bci) {
+        break;
+      }
     }
-    if (dp->bci() == bci) {
-      assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-      return new ciBitData(dp);
-    }
+  }
+  bool two_free_slots = false;
+  ciProfileData* result = bci_to_extra_data(bci, m, two_free_slots);
+  if (result != NULL) {
+    return result;
+  }
+  if (m != NULL && !two_free_slots) {
+    // We were looking for a SpeculativeTrapData entry we didn't
+    // find. Room is not available for more SpeculativeTrapData
+    // entries, look in the non SpeculativeTrapData entries.
+    return bci_to_data(bci, NULL);
   }
   return NULL;
 }
@@ -525,18 +595,25 @@
   st->print_cr("--- Extra data:");
   DataLayout* dp  = data_layout_at(data_size());
   DataLayout* end = data_layout_at(data_size() + extra_data_size());
-  for (; dp < end; dp = MethodData::next_extra(dp)) {
-    if (dp->tag() == DataLayout::no_tag)  continue;
-    if (dp->tag() == DataLayout::bit_data_tag) {
+  for (;; dp = MethodData::next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
+    switch (dp->tag()) {
+    case DataLayout::no_tag:
+      continue;
+    case DataLayout::bit_data_tag:
       data = new BitData(dp);
-    } else {
-      assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo");
+      break;
+    case DataLayout::arg_info_data_tag:
       data = new ciArgInfoData(dp);
       dp = end; // ArgInfoData is at the end of extra data section.
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
     data->print_data_on(st);
+    if (dp >= end) return;
   }
 }

@@ -569,8 +646,8 @@
   st->cr();
 }

-void ciCallTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciCallTypeData");
+void ciCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciCallTypeData", extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -599,18 +676,18 @@
   }
 }

-void ciReceiverTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciReceiverTypeData");
+void ciReceiverTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciReceiverTypeData", extra);
   print_receiver_data_on(st);
 }

-void ciVirtualCallData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciVirtualCallData");
+void ciVirtualCallData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciVirtualCallData", extra);
   rtd_super()->print_receiver_data_on(st);
 }

-void ciVirtualCallTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ciVirtualCallTypeData");
+void ciVirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ciVirtualCallTypeData", extra);
   rtd_super()->print_receiver_data_on(st);
   if (has_arguments()) {
     tab(st, true);
@@ -624,8 +701,15 @@
   }
 }

-void ciParametersTypeData::print_data_on(outputStream* st) const {
-  st->print_cr("Parametertypes");
+void ciParametersTypeData::print_data_on(outputStream* st, const char* extra) const {
+  st->print_cr("ciParametersTypeData");
   parameters()->print_data_on(st);
 }
+
+void ciSpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const {
+  st->print_cr("ciSpeculativeTrapData");
+  tab(st);
+  method()->print_short_name(st);
+  st->cr();
+}
 #endif
--- a/src/share/vm/ci/ciMethodData.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -31,6 +31,7 @@
 #include "ci/ciUtilities.hpp"
 #include "oops/methodData.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/deoptimization.hpp"

 class ciBitData;
 class ciCounterData;
@@ -44,6 +45,7 @@
 class ciCallTypeData;
 class ciVirtualCallTypeData;
 class ciParametersTypeData;
+class ciSpeculativeTrapData;;

 typedef ProfileData ciProfileData;

@@ -173,7 +175,7 @@
   }

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };

@@ -200,7 +202,7 @@
   }
   void translate_receiver_data_from(const ProfileData* data);
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
   void print_receiver_data_on(outputStream* st) const;
 #endif
 };
@@ -225,7 +227,7 @@
     rtd_super()->translate_receiver_data_from(data);
   }
 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };

@@ -287,7 +289,7 @@
   }

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };

@@ -336,7 +338,26 @@
   }

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra) const;
+#endif
+};
+
+class ciSpeculativeTrapData : public SpeculativeTrapData {
+public:
+  ciSpeculativeTrapData(DataLayout* layout) : SpeculativeTrapData(layout) {}
+
+  virtual void translate_from(const ProfileData* data);
+
+  ciMethod* method() const {
+    return (ciMethod*)intptr_at(method_offset);
+  }
+
+  void set_method(ciMethod* m) {
+    set_intptr_at(method_offset, (intptr_t)m);
+  }
+
+#ifndef PRODUCT
+  void print_data_on(outputStream* st, const char* extra) const;
 #endif
 };

@@ -436,6 +457,16 @@

   ciArgInfoData *arg_info() const;

+  address data_base() const {
+    return (address) _data;
+  }
+  DataLayout* limit_data_position() const {
+    return (DataLayout*)((address)data_base() + _data_size);
+  }
+
+  void load_extra_data();
+  ciProfileData* bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots);
+
 public:
   bool is_method_data() const { return true; }

@@ -447,6 +478,18 @@

   int invocation_count() { return _invocation_counter; }
   int backedge_count()   { return _backedge_counter;   }
+
+#if INCLUDE_RTM_OPT
+  // return cached value
+  int rtm_state() {
+    if (is_empty()) {
+      return NoRTM;
+    } else {
+      return get_MethodData()->rtm_state();
+    }
+  }
+#endif
+
   // Transfer information about the method to MethodData*.
   // would_profile means we would like to profile this method,
   // meaning it's not trivial.
@@ -475,9 +518,11 @@
   ciProfileData* next_data(ciProfileData* current);
   bool is_valid(ciProfileData* current) { return current != NULL; }

-  // Get the data at an arbitrary bci, or NULL if there is none.
-  ciProfileData* bci_to_data(int bci);
-  ciProfileData* bci_to_extra_data(int bci, bool create_if_missing);
+  DataLayout* extra_data_base() const { return limit_data_position(); }
+
+  // Get the data at an arbitrary bci, or NULL if there is none. If m
+  // is not NULL look for a SpeculativeTrapData if any first.
+  ciProfileData* bci_to_data(int bci, ciMethod* m = NULL);

   uint overflow_trap_count() const {
     return _orig.overflow_trap_count();
@@ -496,12 +541,13 @@

   // Helpful query functions that decode trap_state.
   int has_trap_at(ciProfileData* data, int reason);
-  int has_trap_at(int bci, int reason) {
-    return has_trap_at(bci_to_data(bci), reason);
+  int has_trap_at(int bci, ciMethod* m, int reason) {
+    assert((m != NULL) == Deoptimization::reason_is_speculate(reason), "inconsistent method/reason");
+    return has_trap_at(bci_to_data(bci, m), reason);
   }
   int trap_recompiled_at(ciProfileData* data);
-  int trap_recompiled_at(int bci) {
-    return trap_recompiled_at(bci_to_data(bci));
+  int trap_recompiled_at(int bci, ciMethod* m) {
+    return trap_recompiled_at(bci_to_data(bci, m));
   }

   void clear_escape_info();
--- a/src/share/vm/classfile/altHashing.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/classfile/altHashing.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,18 +39,18 @@
 }

 // Seed value used for each alternative hash calculated.
-jint AltHashing::compute_seed() {
+juint AltHashing::compute_seed() {
   jlong nanos = os::javaTimeNanos();
   jlong now = os::javaTimeMillis();
-  jint SEED_MATERIAL[8] = {
-            (jint) object_hash(SystemDictionary::String_klass()),
-            (jint) object_hash(SystemDictionary::System_klass()),
-            (jint) os::random(),  // current thread isn't a java thread
-            (jint) (((julong)nanos) >> 32),
-            (jint) nanos,
-            (jint) (((julong)now) >> 32),
-            (jint) now,
-            (jint) (os::javaTimeNanos() >> 2)
+  int SEED_MATERIAL[8] = {
+            (int) object_hash(SystemDictionary::String_klass()),
+            (int) object_hash(SystemDictionary::System_klass()),
+            (int) os::random(),  // current thread isn't a java thread
+            (int) (((julong)nanos) >> 32),
+            (int) nanos,
+            (int) (((julong)now) >> 32),
+            (int) now,
+            (int) (os::javaTimeNanos() >> 2)
   };

   return murmur3_32(SEED_MATERIAL, 8);
@@ -58,14 +58,14 @@


 // Murmur3 hashing for Symbol
-jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) {
+  juint h1 = seed;
   int count = len;
   int offset = 0;

   // body
   while (count >= 4) {
-    jint k1 = (data[offset] & 0x0FF)
+    juint k1 = (data[offset] & 0x0FF)
         | (data[offset + 1] & 0x0FF) << 8
         | (data[offset + 2] & 0x0FF) << 16
         | data[offset + 3] << 24;
@@ -85,7 +85,7 @@
   // tail

   if (count > 0) {
-    jint k1 = 0;
+    juint k1 = 0;

     switch (count) {
       case 3:
@@ -109,18 +109,18 @@
   h1 ^= len;

   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((unsigned int)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;

   return h1;
 }

 // Murmur3 hashing for Strings
-jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) {
+  juint h1 = seed;

   int off = 0;
   int count = len;
@@ -129,7 +129,7 @@
   while (count >= 2) {
     jchar d1 = data[off++] & 0xFFFF;
     jchar d2 = data[off++];
-    jint k1 = (d1 | d2 << 16);
+    juint k1 = (d1 | d2 << 16);

     count -= 2;

@@ -145,7 +145,7 @@
   // tail

   if (count > 0) {
-    int k1 = data[off];
+    juint k1 = (juint)data[off];

     k1 *= 0xcc9e2d51;
     k1 = Integer_rotateLeft(k1, 15);
@@ -157,25 +157,25 @@
   h1 ^= len * 2; // (Character.SIZE / Byte.SIZE);

   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((unsigned int)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((unsigned int)h1) >> 16;
+  h1 ^= h1 >> 16;

   return h1;
 }

 // Hash used for the seed.
-jint AltHashing::murmur3_32(jint seed, const int* data, int len) {
-  jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const int* data, int len) {
+  juint h1 = seed;

   int off = 0;
   int end = len;

   // body
   while (off < end) {
-    jint k1 = data[off++];
+    juint k1 = (juint)data[off++];

     k1 *= 0xcc9e2d51;
     k1 = Integer_rotateLeft(k1, 15);
@@ -193,26 +193,26 @@
   h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE);

   // finalization mix force all bits of a hash block to avalanche
-  h1 ^= ((juint)h1) >> 16;
+  h1 ^= h1 >> 16;
   h1 *= 0x85ebca6b;
-  h1 ^= ((juint)h1) >> 13;
+  h1 ^= h1 >> 13;
   h1 *= 0xc2b2ae35;
-  h1 ^= ((juint)h1) >> 16;
+  h1 ^= h1 >> 16;

   return h1;
 }

-jint AltHashing::murmur3_32(const int* data, int len) {
+juint AltHashing::murmur3_32(const int* data, int len) {
   return murmur3_32(0, data, len);
 }

 #ifndef PRODUCT
 // Overloaded versions for internal test.
-jint AltHashing::murmur3_32(const jbyte* data, int len) {
+juint AltHashing::murmur3_32(const jbyte* data, int len) {
   return murmur3_32(0, data, len);
 }

-jint AltHashing::murmur3_32(const jchar* data, int len) {
+juint AltHashing::murmur3_32(const jchar* data, int len) {
   return murmur3_32(0, data, len);
 }

@@ -251,11 +251,11 @@

   // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
   for (int i = 0; i < 256; i++) {
-    jint hash = murmur3_32(256 - i, vector, i);
+    juint hash = murmur3_32(256 - i, vector, i);
     hashes[i * 4] = (jbyte) hash;
-    hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8);
-    hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16);
-    hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24);
+    hashes[i * 4 + 1] = (jbyte)(hash >> 8);
+    hashes[i * 4 + 2] = (jbyte)(hash >> 16);
+    hashes[i * 4 + 3] = (jbyte)(hash >> 24);
   }

   // hash to get const result.
@@ -269,7 +269,7 @@
 }

 void AltHashing::testEquivalentHashes() {
-  jint jbytes, jchars, ints;
+  juint jbytes, jchars, ints;

   // printf("testEquivalentHashes\n");
--- a/src/share/vm/classfile/altHashing.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/classfile/altHashing.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,24 +39,24 @@
 class AltHashing : AllStatic {

   // utility function copied from java/lang/Integer
-  static jint Integer_rotateLeft(jint i, int distance) {
-    return (i << distance) | (((juint)i) >> (32-distance));
+  static juint Integer_rotateLeft(juint i, int distance) {
+    return (i << distance) | (i >> (32-distance));
   }
-  static jint murmur3_32(const int* data, int len);
-  static jint murmur3_32(jint seed, const int* data, int len);
+  static juint murmur3_32(const int* data, int len);
+  static juint murmur3_32(juint seed, const int* data, int len);

 #ifndef PRODUCT
   // Hashing functions used for internal testing
-  static jint murmur3_32(const jbyte* data, int len);
-  static jint murmur3_32(const jchar* data, int len);
+  static juint murmur3_32(const jbyte* data, int len);
+  static juint murmur3_32(const jchar* data, int len);
   static void testMurmur3_32_ByteArray();
   static void testEquivalentHashes();
 #endif // PRODUCT

  public:
-  static jint compute_seed();
-  static jint murmur3_32(jint seed, const jbyte* data, int len);
-  static jint murmur3_32(jint seed, const jchar* data, int len);
+  static juint compute_seed();
+  static juint murmur3_32(juint seed, const jbyte* data, int len);
+  static juint murmur3_32(juint seed, const jchar* data, int len);
   NOT_PRODUCT(static void test_alt_hash();)
 };
 #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
--- a/src/share/vm/classfile/javaClasses.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,6 @@

   static Handle basic_create(int length, TRAPS);

-  static void set_value( oop string, typeArrayOop buffer) {
-    assert(initialized, "Must be initialized");
-    string->obj_field_put(value_offset,  (oop)buffer);
-  }
   static void set_offset(oop string, int offset) {
     assert(initialized, "Must be initialized");
     if (offset_offset > 0) {
@@ -122,12 +118,26 @@
     return hash_offset;
   }

+  static void set_value(oop string, typeArrayOop buffer) {
+    assert(initialized && (value_offset > 0), "Must be initialized");
+    string->obj_field_put(value_offset, (oop)buffer);
+  }
+  static void set_hash(oop string, unsigned int hash) {
+    assert(initialized && (hash_offset > 0), "Must be initialized");
+    string->int_field_put(hash_offset, hash);
+  }
+
   // Accessors
   static typeArrayOop value(oop java_string) {
     assert(initialized && (value_offset > 0), "Must be initialized");
     assert(is_instance(java_string), "must be java_string");
     return (typeArrayOop) java_string->obj_field(value_offset);
   }
+  static unsigned int hash(oop java_string) {
+    assert(initialized && (hash_offset > 0), "Must be initialized");
+    assert(is_instance(java_string), "must be java_string");
+    return java_string->int_field(hash_offset);
+  }
   static int offset(oop java_string) {
     assert(initialized, "Must be initialized");
     assert(is_instance(java_string), "must be java_string");
--- a/src/share/vm/classfile/symbolTable.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/classfile/symbolTable.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,9 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#endif

 // --------------------------------------------------------------------------

@@ -728,6 +731,15 @@
     string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
   }

+#if INCLUDE_ALL_GCS
+  if (G1StringDedup::is_enabled()) {
+    // Deduplicate the string before it is interned. Note that we should never
+    // deduplicate a string after it has been interned. Doing so will counteract
+    // compiler optimizations done on e.g. interned string literals.
+    G1StringDedup::deduplicate(string());
+  }
+#endif
+
   // Grab the StringTable_lock before getting the_table() because it could
   // change at safepoint.
   MutexLocker ml(StringTable_lock, THREAD);
--- a/src/share/vm/code/nmethod.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/code/nmethod.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -481,7 +481,9 @@
   _scavenge_root_link      = NULL;
   _scavenge_root_state     = 0;
   _compiler                = NULL;
-
+#if INCLUDE_RTM_OPT
+  _rtm_state               = NoRTM;
+#endif
 #ifdef HAVE_DTRACE_H
   _trap_offset             = 0;
 #endif // def HAVE_DTRACE_H
--- a/src/share/vm/code/nmethod.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/code/nmethod.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -193,6 +193,12 @@

   jbyte _scavenge_root_state;

+#if INCLUDE_RTM_OPT
+  // RTM state at compile time. Used during deoptimization to decide
+  // whether to restart collecting RTM locking abort statistic again.
+  RTMState _rtm_state;
+#endif
+
   // Nmethod Flushing lock. If non-zero, then the nmethod is not removed
   // and is not made into a zombie. However, once the nmethod is made into
   // a zombie, it will be locked one final time if CompiledMethodUnload
@@ -414,6 +420,12 @@
   bool  is_zombie() const                         { return _state == zombie; }
   bool  is_unloaded() const                       { return _state == unloaded;   }

+#if INCLUDE_RTM_OPT
+  // rtm state accessing and manipulating
+  RTMState  rtm_state() const                     { return _rtm_state; }
+  void set_rtm_state(RTMState state)              { _rtm_state = state; }
+#endif
+
   // Make the nmethod non entrant. The nmethod will continue to be
   // alive.  It is used when an uncommon trap happens.  Returns true
   // if this thread changed the state of the nmethod or false if
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2496,7 +2496,8 @@
 }

 void CMSCollector::report_heap_summary(GCWhen::Type when) {
-  _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary, _last_metaspace_summary);
+  _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary);
+  _gc_tracer_cm->report_metaspace_summary(when, _last_metaspace_summary);
 }

 void CMSCollector::collect_in_foreground(bool clear_all_soft_refs, GCCause::Cause cause) {
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1809,8 +1809,8 @@
   uint _regions_claimed;
   size_t _freed_bytes;
   FreeRegionList* _local_cleanup_list;
-  OldRegionSet* _old_proxy_set;
-  HumongousRegionSet* _humongous_proxy_set;
+  HeapRegionSetCount _old_regions_removed;
+  HeapRegionSetCount _humongous_regions_removed;
   HRRSCleanupTask* _hrrs_cleanup_task;
   double _claimed_region_time;
   double _max_region_time;
@@ -1819,19 +1819,19 @@
   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
                              int worker_num,
                              FreeRegionList* local_cleanup_list,
-                             OldRegionSet* old_proxy_set,
-                             HumongousRegionSet* humongous_proxy_set,
                              HRRSCleanupTask* hrrs_cleanup_task) :
     _g1(g1), _worker_num(worker_num),
     _max_live_bytes(0), _regions_claimed(0),
     _freed_bytes(0),
     _claimed_region_time(0.0), _max_region_time(0.0),
     _local_cleanup_list(local_cleanup_list),
-    _old_proxy_set(old_proxy_set),
-    _humongous_proxy_set(humongous_proxy_set),
+    _old_regions_removed(),
+    _humongous_regions_removed(),
     _hrrs_cleanup_task(hrrs_cleanup_task) { }

   size_t freed_bytes() { return _freed_bytes; }
+  const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
+  const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }

   bool doHeapRegion(HeapRegion *hr) {
     if (hr->continuesHumongous()) {
@@ -1844,13 +1844,22 @@
     _regions_claimed++;
     hr->note_end_of_marking();
     _max_live_bytes += hr->max_live_bytes();
-    _g1->free_region_if_empty(hr,
-                              &_freed_bytes,
-                              _local_cleanup_list,
-                              _old_proxy_set,
-                              _humongous_proxy_set,
-                              _hrrs_cleanup_task,
-                              true /* par */);
+
+    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
+      _freed_bytes += hr->used();
+      hr->set_containing_set(NULL);
+      if (hr->isHumongous()) {
+        assert(hr->startsHumongous(), "we should only see starts humongous");
+        _humongous_regions_removed.increment(1u, hr->capacity());
+        _g1->free_humongous_region(hr, _local_cleanup_list, true);
+      } else {
+        _old_regions_removed.increment(1u, hr->capacity());
+        _g1->free_region(hr, _local_cleanup_list, true);
+      }
+    } else {
+      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
+    }
+
     double region_time = (os::elapsedTime() - start);
     _claimed_region_time += region_time;
     if (region_time > _max_region_time) {
@@ -1883,12 +1892,8 @@
   void work(uint worker_id) {
     double start = os::elapsedTime();
     FreeRegionList local_cleanup_list("Local Cleanup List");
-    OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
-    HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
     HRRSCleanupTask hrrs_cleanup_task;
     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
-                                           &old_proxy_set,
-                                           &humongous_proxy_set,
                                            &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
@@ -1900,13 +1905,10 @@
     assert(g1_note_end.complete(), "Shouldn't have yielded!");

     // Now update the lists
-    _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
-                                            NULL /* free_list */,
-                                            &old_proxy_set,
-                                            &humongous_proxy_set,
-                                            true /* par */);
+    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
     {
       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
       _max_live_bytes += g1_note_end.max_live_bytes();
       _freed_bytes += g1_note_end.freed_bytes();

@@ -1920,14 +1922,14 @@

       G1HRPrinter* hr_printer = _g1h->hr_printer();
       if (hr_printer->is_active()) {
-        HeapRegionLinkedListIterator iter(&local_cleanup_list);
+        FreeRegionListIterator iter(&local_cleanup_list);
         while (iter.more_available()) {
           HeapRegion* hr = iter.get_next();
           hr_printer->cleanup(hr);
         }
       }

-      _cleanup_list->add_as_tail(&local_cleanup_list);
+      _cleanup_list->add_ordered(&local_cleanup_list);
       assert(local_cleanup_list.is_empty(), "post-condition");

       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
@@ -1971,7 +1973,6 @@
     return;
   }

-  HRSPhaseSetter x(HRSPhaseCleanup);
   g1h->verify_region_sets_optional();

   if (VerifyDuringGC) {
@@ -2144,7 +2145,7 @@

   G1CollectedHeap* g1h = G1CollectedHeap::heap();

-  _cleanup_list.verify_optional();
+  _cleanup_list.verify_list();
   FreeRegionList tmp_free_list("Tmp Free List");

   if (G1ConcRegionFreeingVerbose) {
@@ -2157,9 +2158,9 @@
   // so it's not necessary to take any locks
   while (!_cleanup_list.is_empty()) {
     HeapRegion* hr = _cleanup_list.remove_head();
-    assert(hr != NULL, "the list was not empty");
+    assert(hr != NULL, "Got NULL from a non-empty list");
     hr->par_clear();
-    tmp_free_list.add_as_tail(hr);
+    tmp_free_list.add_ordered(hr);

     // Instead of adding one region at a time to the secondary_free_list,
     // we accumulate them in the local list and move them a few at a
@@ -2179,7 +2180,7 @@

       {
         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
-        g1h->secondary_free_list_add_as_tail(&tmp_free_list);
+        g1h->secondary_free_list_add(&tmp_free_list);
         SecondaryFreeList_lock->notify_all();
       }

@@ -2528,6 +2529,11 @@
     assert(!rp->discovery_enabled(), "Post condition");
   }

+  if (has_overflown()) {
+    // We can not trust g1_is_alive if the marking stack overflowed
+    return;
+  }
+
   g1h->unlink_string_and_symbol_table(&g1_is_alive,
                                       /* process_strings */ false, // currently strings are always roots
                                       /* process_symbols */ true);
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP

-#include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/g1/heapRegionSet.hpp"
 #include "utilities/taskqueue.hpp"

 class G1CollectedHeap;
--- a/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -24,6 +24,14 @@

 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1BiasedArray.hpp"
+#include "memory/padded.inline.hpp"
+
+// Allocate a new array, generic version.
+address G1BiasedMappedArrayBase::create_new_base_array(size_t length, size_t elem_size) {
+  assert(length > 0, "just checking");
+  assert(elem_size > 0, "just checking");
+  return PaddedPrimitiveArray<u_char, mtGC>::create_unfreeable(length * elem_size);
+}

 #ifndef PRODUCT
 void G1BiasedMappedArrayBase::verify_index(idx_t index) const {
--- a/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -25,8 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP

+#include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
-#include "memory/allocation.inline.hpp"

 // Implements the common base functionality for arrays that contain provisions
 // for accessing its elements using a biased index.
@@ -48,11 +48,7 @@
     _bias(0), _shift_by(0) { }

   // Allocate a new array, generic version.
-  static address create_new_base_array(size_t length, size_t elem_size) {
-    assert(length > 0, "just checking");
-    assert(elem_size > 0, "just checking");
-    return NEW_C_HEAP_ARRAY(u_char, length * elem_size, mtGC);
-  }
+  static address create_new_base_array(size_t length, size_t elem_size);

   // Initialize the members of this class. The biased start address of this array
   // is the bias (in elements) multiplied by the element size.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "code/nmethod.hpp"
+#include "gc_implementation/g1/g1CodeCacheRemSet.hpp"
+#include "memory/iterator.hpp"
+
+G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) {
+  _top = bottom();
+}
+
+void G1CodeRootChunk::reset() {
+  _next = _prev = NULL;
+  _top = bottom();
+}
+
+void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) {
+  nmethod** cur = bottom();
+  while (cur != _top) {
+    cl->do_code_blob(*cur);
+    cur++;
+  }
+}
+
+FreeList<G1CodeRootChunk> G1CodeRootSet::_free_list;
+size_t G1CodeRootSet::_num_chunks_handed_out = 0;
+
+G1CodeRootChunk* G1CodeRootSet::new_chunk() {
+  G1CodeRootChunk* result = _free_list.get_chunk_at_head();
+  if (result == NULL) {
+    result = new G1CodeRootChunk();
+  }
+  G1CodeRootSet::_num_chunks_handed_out++;
+  result->reset();
+  return result;
+}
+
+void G1CodeRootSet::free_chunk(G1CodeRootChunk* chunk) {
+  _free_list.return_chunk_at_head(chunk);
+  G1CodeRootSet::_num_chunks_handed_out--;
+}
+
+void G1CodeRootSet::free_all_chunks(FreeList<G1CodeRootChunk>* list) {
+  G1CodeRootSet::_num_chunks_handed_out -= list->count();
+  _free_list.prepend(list);
+}
+
+void G1CodeRootSet::purge_chunks(size_t keep_ratio) {
+  size_t keep = G1CodeRootSet::_num_chunks_handed_out * keep_ratio / 100;
+
+  if (keep >= (size_t)_free_list.count()) {
+    return;
+  }
+
+  FreeList<G1CodeRootChunk> temp;
+  temp.initialize();
+  temp.set_size(G1CodeRootChunk::word_size());
+
+  _free_list.getFirstNChunksFromList((size_t)_free_list.count() - keep, &temp);
+
+  G1CodeRootChunk* cur = temp.get_chunk_at_head();
+  while (cur != NULL) {
+    delete cur;
+    cur = temp.get_chunk_at_head();
+  }
+}
+
+size_t G1CodeRootSet::static_mem_size() {
+  return sizeof(_free_list) + sizeof(_num_chunks_handed_out);
+}
+
+size_t G1CodeRootSet::fl_mem_size() {
+  return _free_list.count() * _free_list.size();
+}
+
+void G1CodeRootSet::initialize() {
+  _free_list.initialize();
+  _free_list.set_size(G1CodeRootChunk::word_size());
+}
+
+G1CodeRootSet::G1CodeRootSet() : _list(), _length(0) {
+  _list.initialize();
+  _list.set_size(G1CodeRootChunk::word_size());
+}
+
+G1CodeRootSet::~G1CodeRootSet() {
+  clear();
+}
+
+void G1CodeRootSet::add(nmethod* method) {
+  if (!contains(method)) {
+    // Try to add the nmethod. If there is not enough space, get a new chunk.
+    if (_list.head() == NULL || _list.head()->is_full()) {
+      G1CodeRootChunk* cur = new_chunk();
+      _list.return_chunk_at_head(cur);
+    }
+    bool result = _list.head()->add(method);
+    guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method));
+    _length++;
+  }
+}
+
+void G1CodeRootSet::remove(nmethod* method) {
+  G1CodeRootChunk* found = find(method);
+  if (found != NULL) {
+    bool result = found->remove(method);
+    guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method));
+    // eventually free completely emptied chunk
+    if (found->is_empty()) {
+      _list.remove_chunk(found);
+      free(found);
+    }
+    _length--;
+  }
+  assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method));
+}
+
+nmethod* G1CodeRootSet::pop() {
+  do {
+    G1CodeRootChunk* cur = _list.head();
+    if (cur == NULL) {
+      assert(_length == 0, "when there are no chunks, there should be no elements");
+      return NULL;
+    }
+    nmethod* result = cur->pop();
+    if (result != NULL) {
+      _length--;
+      return result;
+    } else {
+      free(_list.get_chunk_at_head());
+    }
+  } while (true);
+}
+
+G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) {
+  G1CodeRootChunk* cur = _list.head();
+  while (cur != NULL) {
+    if (cur->contains(method)) {
+      return cur;
+    }
+    cur = (G1CodeRootChunk*)cur->next();
+  }
+  return NULL;
+}
+
+void G1CodeRootSet::free(G1CodeRootChunk* chunk) {
+  free_chunk(chunk);
+}
+
+bool G1CodeRootSet::contains(nmethod* method) {
+  return find(method) != NULL;
+}
+
+void G1CodeRootSet::clear() {
+  free_all_chunks(&_list);
+  _length = 0;
+}
+
+void G1CodeRootSet::nmethods_do(CodeBlobClosure* blk) const {
+  G1CodeRootChunk* cur = _list.head();
+  while (cur != NULL) {
+    cur->nmethods_do(blk);
+    cur = (G1CodeRootChunk*)cur->next();
+  }
+}
+
+size_t G1CodeRootSet::mem_size() {
+  return sizeof(this) + _list.count() * _list.size();
+}
+
+#ifndef PRODUCT
+
+void G1CodeRootSet::test() {
+  initialize();
+
+  assert(_free_list.count() == 0, "Free List must be empty");
+  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+
+  // The number of chunks that we allocate for purge testing.
+  size_t const num_chunks = 10;
+  {
+    G1CodeRootSet set1;
+    assert(set1.is_empty(), "Code root set must be initially empty but is not.");
+
+    set1.add((nmethod*)1);
+    assert(_num_chunks_handed_out == 1,
+           err_msg("Must have allocated and handed out one chunk, but handed out "
+                   SIZE_FORMAT" chunks", _num_chunks_handed_out));
+    assert(set1.length() == 1, err_msg("Added exactly one element, but set contains "
+                                       SIZE_FORMAT" elements", set1.length()));
+
+    // G1CodeRootChunk::word_size() is larger than G1CodeRootChunk::num_entries which
+    // we cannot access.
+    for (uint i = 0; i < G1CodeRootChunk::word_size() + 1; i++) {
+      set1.add((nmethod*)1);
+    }
+    assert(_num_chunks_handed_out == 1,
+           err_msg("Duplicate detection must have prevented allocation of further "
+                   "chunks but contains "SIZE_FORMAT, _num_chunks_handed_out));
+    assert(set1.length() == 1,
+           err_msg("Duplicate detection should not have increased the set size but "
+                   "is "SIZE_FORMAT, set1.length()));
+
+    size_t num_total_after_add = G1CodeRootChunk::word_size() + 1;
+    for (size_t i = 0; i < num_total_after_add - 1; i++) {
+      set1.add((nmethod*)(2 + i));
+    }
+    assert(_num_chunks_handed_out > 1,
+           "After adding more code roots, more than one chunks should have been handed out");
+    assert(set1.length() == num_total_after_add,
+           err_msg("After adding in total "SIZE_FORMAT" distinct code roots, they "
+                   "need to be in the set, but there are only "SIZE_FORMAT,
+                   num_total_after_add, set1.length()));
+
+    size_t num_popped = 0;
+    while (set1.pop() != NULL) {
+      num_popped++;
+    }
+    assert(num_popped == num_total_after_add,
+           err_msg("Managed to pop "SIZE_FORMAT" code roots, but only "SIZE_FORMAT" "
+                   "were added", num_popped, num_total_after_add));
+    assert(_num_chunks_handed_out == 0,
+           err_msg("After popping all elements, all chunks must have been returned "
+                   "but are still "SIZE_FORMAT, _num_chunks_handed_out));
+
+    purge_chunks(0);
+    assert(_free_list.count() == 0,
+           err_msg("After purging everything, the free list must be empty but still "
+                   "contains "SIZE_FORMAT" chunks", _free_list.count()));
+
+    // Add some more handed out chunks.
+    size_t i = 0;
+    while (_num_chunks_handed_out < num_chunks) {
+      set1.add((nmethod*)i);
+      i++;
+    }
+
+    {
+      // Generate chunks on the free list.
+      G1CodeRootSet set2;
+      size_t i = 0;
+      while (_num_chunks_handed_out < num_chunks * 2) {
+        set2.add((nmethod*)i);
+        i++;
+      }
+      // Exit of the scope of the set2 object will call the destructor that generates
+      // num_chunks elements on the free list.
+    }
+
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Deletion of the second set must have resulted in giving back "
+                   "those, but there is still "SIZE_FORMAT" handed out, expecting "
+                   SIZE_FORMAT, _num_chunks_handed_out, num_chunks));
+    assert((size_t)_free_list.count() == num_chunks,
+           err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
+                   "but there are only "SIZE_FORMAT, num_chunks, _free_list.count()));
+
+    size_t const test_percentage = 50;
+    purge_chunks(test_percentage);
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Purging must not hand out chunks but there are "SIZE_FORMAT,
+                   _num_chunks_handed_out));
+    assert((size_t)_free_list.count() == (ssize_t)(num_chunks * test_percentage / 100),
+           err_msg("Must have purged "SIZE_FORMAT" percent of "SIZE_FORMAT" chunks"
+                   "but there are "SSIZE_FORMAT, test_percentage, num_chunks,
+                   _free_list.count()));
+    // Purge the remainder of the chunks on the free list.
+    purge_chunks(0);
+    assert(_free_list.count() == 0, "Free List must be empty");
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Expected to be "SIZE_FORMAT" chunks handed out from the first set "
+                   "but there are "SIZE_FORMAT, num_chunks, _num_chunks_handed_out));
+
+    // Exit of the scope of the set1 object will call the destructor that generates
+    // num_chunks additional elements on the free list.
+  }
+
+  assert(_num_chunks_handed_out == 0,
+         err_msg("Deletion of the only set must have resulted in no chunks handed "
+                 "out, but there is still "SIZE_FORMAT" handed out", _num_chunks_handed_out));
+  assert((size_t)_free_list.count() == num_chunks,
+         err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
+                 "but there are only "SSIZE_FORMAT, num_chunks, _free_list.count()));
+
+  // Restore initial state.
+  purge_chunks(0);
+  assert(_free_list.count() == 0, "Free List must be empty");
+  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+}
+
+void TestCodeCacheRemSet_test() {
+  G1CodeRootSet::test();
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/freeList.hpp"
+#include "runtime/globals.hpp"
+
+class CodeBlobClosure;
+
+class G1CodeRootChunk : public CHeapObj<mtGC> {
+ private:
+  static const int NUM_ENTRIES = 32;
+ public:
+  G1CodeRootChunk*     _next;
+  G1CodeRootChunk*     _prev;
+
+  nmethod** _top;
+
+  nmethod* _data[NUM_ENTRIES];
+
+  nmethod** bottom() const {
+    return (nmethod**) &(_data[0]);
+  }
+
+  nmethod** end() const {
+    return (nmethod**) &(_data[NUM_ENTRIES]);
+  }
+
+ public:
+  G1CodeRootChunk();
+  ~G1CodeRootChunk() {}
+
+  static size_t word_size() { return (size_t)(align_size_up_(sizeof(G1CodeRootChunk), HeapWordSize) / HeapWordSize); }
+
+  // FreeList "interface" methods
+
+  G1CodeRootChunk* next() const         { return _next; }
+  G1CodeRootChunk* prev() const         { return _prev; }
+  void set_next(G1CodeRootChunk* v)     { _next = v; assert(v != this, "Boom");}
+  void set_prev(G1CodeRootChunk* v)     { _prev = v; assert(v != this, "Boom");}
+  void clear_next()       { set_next(NULL); }
+  void clear_prev()       { set_prev(NULL); }
+
+  size_t size() const { return word_size(); }
+
+  void link_next(G1CodeRootChunk* ptr)  { set_next(ptr); }
+  void link_prev(G1CodeRootChunk* ptr)  { set_prev(ptr); }
+  void link_after(G1CodeRootChunk* ptr) {
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev((G1CodeRootChunk*)this);
+  }
+
+  bool is_free()                 { return true; }
+
+  // New G1CodeRootChunk routines
+
+  void reset();
+
+  bool is_empty() const {
+    return _top == bottom();
+  }
+
+  bool is_full() const {
+    return _top == (nmethod**)end();
+  }
+
+  bool contains(nmethod* method) {
+    nmethod** cur = bottom();
+    while (cur != _top) {
+      if (*cur == method) return true;
+      cur++;
+    }
+    return false;
+  }
+
+  bool add(nmethod* method) {
+    if (is_full()) return false;
+    *_top = method;
+    _top++;
+    return true;
+  }
+
+  bool remove(nmethod* method) {
+    nmethod** cur = bottom();
+    while (cur != _top) {
+      if (*cur == method) {
+        memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**));
+        _top--;
+        return true;
+      }
+      cur++;
+    }
+    return false;
+  }
+
+  void nmethods_do(CodeBlobClosure* blk);
+
+  nmethod* pop() {
+    if (is_empty()) {
+      return NULL;
+    }
+    _top--;
+    return *_top;
+  }
+};
+
+// Implements storage for a set of code roots.
+// All methods that modify the set are not thread-safe except if otherwise noted.
+class G1CodeRootSet VALUE_OBJ_CLASS_SPEC {
+ private:
+  // Global free chunk list management
+  static FreeList<G1CodeRootChunk> _free_list;
+  // Total number of chunks handed out
+  static size_t _num_chunks_handed_out;
+
+  static G1CodeRootChunk* new_chunk();
+  static void free_chunk(G1CodeRootChunk* chunk);
+  // Free all elements of the given list.
+  static void free_all_chunks(FreeList<G1CodeRootChunk>* list);
+
+  // Return the chunk that contains the given nmethod, NULL otherwise.
+  // Scans the list of chunks backwards, as this method is used to add new
+  // entries, which are typically added in bulk for a single nmethod.
+  G1CodeRootChunk* find(nmethod* method);
+  void free(G1CodeRootChunk* chunk);
+
+  size_t _length;
+  FreeList<G1CodeRootChunk> _list;
+
+ public:
+  G1CodeRootSet();
+  ~G1CodeRootSet();
+
+  static void initialize();
+  static void purge_chunks(size_t keep_ratio);
+
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  // Search for the code blob from the recently allocated ones to find duplicates more quickly, as this
+  // method is likely to be repeatedly called with the same nmethod.
+  void add(nmethod* method);
+
+  void remove(nmethod* method);
+  nmethod* pop();
+
+  bool contains(nmethod* method);
+
+  void clear();
+
+  void nmethods_do(CodeBlobClosure* blk) const;
+
+  bool is_empty() { return length() == 0; }
+
+  // Length in elements
+  size_t length() const { return _length; }
+
+  // Memory size in bytes taken by this set.
+  size_t mem_size();
+
+  static void test() PRODUCT_RETURN;
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -39,6 +39,7 @@
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
@@ -169,14 +170,6 @@
   int calls() { return _calls; }
 };

-class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
-public:
-  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
-    *card_ptr = CardTableModRefBS::dirty_card_val();
-    return true;
-  }
-};
-
 YoungList::YoungList(G1CollectedHeap* g1h) :
     _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
     _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
@@ -524,7 +517,7 @@
 // Private methods.

 HeapRegion*
-G1CollectedHeap::new_region_try_secondary_free_list() {
+G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) {
   MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
   while (!_secondary_free_list.is_empty() || free_regions_coming()) {
     if (!_secondary_free_list.is_empty()) {
@@ -540,7 +533,7 @@

       assert(!_free_list.is_empty(), "if the secondary_free_list was not "
              "empty we should have moved at least one entry to the free_list");
-      HeapRegion* res = _free_list.remove_head();
+      HeapRegion* res = _free_list.remove_region(is_old);
       if (G1ConcRegionFreeingVerbose) {
         gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                                "allocated "HR_FORMAT" from secondary_free_list",
@@ -562,7 +555,7 @@
   return NULL;
 }

-HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
+HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_expand) {
   assert(!isHumongous(word_size) || word_size <= HeapRegion::GrainWords,
          "the only time we use this to allocate a humongous region is "
          "when we are allocating a single humongous region");
@@ -574,19 +567,21 @@
         gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                                "forced to look at the secondary_free_list");
       }
-      res = new_region_try_secondary_free_list();
+      res = new_region_try_secondary_free_list(is_old);
       if (res != NULL) {
         return res;
       }
     }
   }
-  res = _free_list.remove_head_or_null();
+
+  res = _free_list.remove_region(is_old);
+
   if (res == NULL) {
     if (G1ConcRegionFreeingVerbose) {
       gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                              "res == NULL, trying the secondary_free_list");
     }
-    res = new_region_try_secondary_free_list();
+    res = new_region_try_secondary_free_list(is_old);
   }
   if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
     // Currently, only attempts to allocate GC alloc regions set
@@ -603,12 +598,9 @@
     if (expand(word_size * HeapWordSize)) {
       // Given that expand() succeeded in expanding the heap, and we
       // always expand the heap by an amount aligned to the heap
-      // region size, the free list should in theory not be empty. So
-      // it would probably be OK to use remove_head(). But the extra
-      // check for NULL is unlikely to be a performance issue here (we
-      // just expanded the heap!) so let's just be conservative and
-      // use remove_head_or_null().
-      res = _free_list.remove_head_or_null();
+      // region size, the free list should in theory not be empty.
+      // In either case remove_region() will check for NULL.
+      res = _free_list.remove_region(is_old);
     } else {
       _expand_heap_after_alloc_failure = false;
     }
@@ -626,7 +618,7 @@
     // Only one region to allocate, no need to go through the slower
     // path. The caller will attempt the expansion if this fails, so
     // let's not try to expand here too.
-    HeapRegion* hr = new_region(word_size, false /* do_expand */);
+    HeapRegion* hr = new_region(word_size, true /* is_old */, false /* do_expand */);
     if (hr != NULL) {
       first = hr->hrs_index();
     } else {
@@ -1298,7 +1290,6 @@

   size_t metadata_prev_used = MetaspaceAux::allocated_used_bytes();

-  HRSPhaseSetter x(HRSPhaseFullGC);
   verify_region_sets_optional();

   const bool do_clear_all_soft_refs = clear_all_soft_refs ||
@@ -1928,10 +1919,10 @@
   _g1mm(NULL),
   _refine_cte_cl(NULL),
   _full_collection(false),
-  _free_list("Master Free List"),
-  _secondary_free_list("Secondary Free List"),
-  _old_set("Old Set"),
-  _humongous_set("Master Humongous Set"),
+  _free_list("Master Free List", new MasterFreeRegionListMtSafeChecker()),
+  _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
+  _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
+  _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
@@ -1963,7 +1954,7 @@
   int n_queues = MAX2((int)ParallelGCThreads, 1);
   _task_queues = new RefToScanQueueSet(n_queues);

-  int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
+  uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
   assert(n_rem_sets > 0, "Invariant.");

   _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC);
@@ -2079,7 +2070,7 @@
   guarantee(HeapRegion::CardsPerRegion < max_cards_per_region,
             "too many cards per region");

-  HeapRegionSet::set_unrealistically_long_length(max_regions() + 1);
+  FreeRegionList::set_unrealistically_long_length(max_regions() + 1);

   _bot_shared = new G1BlockOffsetSharedArray(_reserved,
                                              heap_word_size(init_byte_size));
@@ -2182,6 +2173,8 @@
   // values in the heap have been properly initialized.
   _g1mm = new G1MonitoringSupport(this);

+  G1StringDedup::initialize();
+
   return JNI_OK;
 }

@@ -2266,7 +2259,7 @@
                                 // (for efficiency/performance)
                            false);
                                 // Setting next fields of discovered
-                                // lists requires a barrier.
+                                // lists does not require a barrier.
 }

 size_t G1CollectedHeap::capacity() const {
@@ -2369,8 +2362,12 @@
 };

 size_t G1CollectedHeap::recalculate_used() const {
+  double recalculate_used_start = os::elapsedTime();
+
   SumUsedClosure blk;
   heap_region_iterate(&blk);
+
+  g1_policy()->phase_times()->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0);
   return blk.result();
 }

@@ -3013,7 +3010,17 @@
 }

 size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const {
-  return HeapRegion::GrainBytes;
+  return (_g1_policy->young_list_target_length() - young_list()->survivor_length()) * HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::tlab_used(Thread* ignored) const {
+  return young_list()->eden_used_bytes();
+}
+
+// For G1 TLABs should not contain humongous objects, so the maximum TLAB size
+// must be smaller than the humongous object limit.
+size_t G1CollectedHeap::max_tlab_size() const {
+  return align_size_down(_humongous_object_threshold_in_words - 1, MinObjAlignment);
 }

 size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
@@ -3025,11 +3032,11 @@
   // humongous objects.

   HeapRegion* hr = _mutator_alloc_region.get();
-  size_t max_tlab_size = _humongous_object_threshold_in_words * wordSize;
+  size_t max_tlab = max_tlab_size() * wordSize;
   if (hr == NULL) {
-    return max_tlab_size;
+    return max_tlab;
   } else {
-    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab_size);
+    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab);
   }
 }

@@ -3470,6 +3477,11 @@
     if (!silent) gclog_or_tty->print("RemSet ");
     rem_set()->verify();

+    if (G1StringDedup::is_enabled()) {
+      if (!silent) gclog_or_tty->print("StrDedup ");
+      G1StringDedup::verify();
+    }
+
     if (failures) {
       gclog_or_tty->print_cr("Heap:");
       // It helps to have the per-region information in the output to
@@ -3487,8 +3499,13 @@
     }
     guarantee(!failures, "there should not have been any failures");
   } else {
-    if (!silent)
-      gclog_or_tty->print("(SKIPPING roots, heapRegionSets, heapRegions, remset) ");
+    if (!silent) {
+      gclog_or_tty->print("(SKIPPING Roots, HeapRegionSets, HeapRegions, RemSet");
+      if (G1StringDedup::is_enabled()) {
+        gclog_or_tty->print(", StrDedup");
+      }
+      gclog_or_tty->print(") ");
+    }
   }
 }

@@ -3581,6 +3598,9 @@
   st->cr();
   _cm->print_worker_threads_on(st);
   _cg1r->print_worker_threads_on(st);
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::print_worker_threads_on(st);
+  }
 }

 void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
@@ -3589,6 +3609,9 @@
   }
   tc->do_thread(_cmThread);
   _cg1r->threads_do(tc);
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::threads_do(tc);
+  }
 }

 void G1CollectedHeap::print_tracing_info() const {
@@ -3668,6 +3691,7 @@
   // always_do_update_barrier = false;
   assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
   // Fill TLAB's and such
+  accumulate_statistics_all_tlabs();
   ensure_parsability(true);

   if (G1SummarizeRSetStats && (G1SummarizeRSetStatsPeriod > 0) &&
@@ -3692,6 +3716,8 @@
                         "derived pointer present"));
   // always_do_update_barrier = true;

+  resize_all_tlabs();
+
   // We have just completed a GC. Update the soft reference
   // policy with the new heap occupancy
   Universe::update_heap_info_at_gc();
@@ -3892,7 +3918,6 @@
   print_heap_before_gc();
   trace_heap_before_gc(_gc_tracer_stw);

-  HRSPhaseSetter x(HRSPhaseEvacuation);
   verify_region_sets_optional();
   verify_dirty_young_regions();

@@ -4391,6 +4416,8 @@
 void G1CollectedHeap::remove_self_forwarding_pointers() {
   assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");

+  double remove_self_forwards_start = os::elapsedTime();
+
   G1ParRemoveSelfForwardPtrsTask rsfp_task(this);

   if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -4418,6 +4445,8 @@
   }
   _objs_with_preserved_marks.clear(true);
   _preserved_marks_of_objs.clear(true);
+
+  g1_policy()->phase_times()->record_evac_fail_remove_self_forwards((os::elapsedTime() - remove_self_forwards_start) * 1000.0);
 }

 void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) {
@@ -4639,9 +4668,7 @@
 #endif // ASSERT

 void G1ParScanThreadState::trim_queue() {
-  assert(_evac_cl != NULL, "not set");
   assert(_evac_failure_cl != NULL, "not set");
-  assert(_partial_scan_cl != NULL, "not set");

   StarTask ref;
   do {
@@ -4732,6 +4759,12 @@
   oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+
+    // alloc_purpose is just a hint to allocate() above, recheck the type of region
+    // we actually allocated from and update alloc_purpose accordingly
+    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
+    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
+
     if (g1p->track_object_age(alloc_purpose)) {
       // We could simply do obj->incr_age(). However, this causes a
       // performance issue. obj->incr_age() will first check whether
@@ -4759,6 +4792,13 @@
       obj->set_mark(m);
     }

+    if (G1StringDedup::is_enabled()) {
+      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
+                                             to_region->is_young(),
+                                             queue_num(),
+                                             obj);
+    }
+
     size_t* surv_young_words = surviving_young_words();
     surv_young_words[young_index] += word_sz;

@@ -4837,55 +4877,6 @@
 template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(oop* p);
 template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(narrowOop* p);

-template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
-  assert(has_partial_array_mask(p), "invariant");
-  oop from_obj = clear_partial_array_mask(p);
-
-  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
-  assert(from_obj->is_objArray(), "must be obj array");
-  objArrayOop from_obj_array = objArrayOop(from_obj);
-  // The from-space object contains the real length.
-  int length                 = from_obj_array->length();
-
-  assert(from_obj->is_forwarded(), "must be forwarded");
-  oop to_obj                 = from_obj->forwardee();
-  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
-  objArrayOop to_obj_array   = objArrayOop(to_obj);
-  // We keep track of the next start index in the length field of the
-  // to-space object.
-  int next_index             = to_obj_array->length();
-  assert(0 <= next_index && next_index < length,
-         err_msg("invariant, next index: %d, length: %d", next_index, length));
-
-  int start                  = next_index;
-  int end                    = length;
-  int remainder              = end - start;
-  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
-  if (remainder > 2 * ParGCArrayScanChunk) {
-    end = start + ParGCArrayScanChunk;
-    to_obj_array->set_length(end);
-    // Push the remainder before we process the range in case another
-    // worker has run out of things to do and can steal it.
-    oop* from_obj_p = set_partial_array_mask(from_obj);
-    _par_scan_state->push_on_queue(from_obj_p);
-  } else {
-    assert(length == end, "sanity");
-    // We'll process the final range for this object. Restore the length
-    // so that the heap remains parsable in case of evacuation failure.
-    to_obj_array->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
-  // Process indexes [start,end). It will also process the header
-  // along with the first chunk (i.e., the chunk with start == 0).
-  // Note that at this point the length field of to_obj_array is not
-  // correct given that we are using it to keep track of the next
-  // start index. oop_iterate_range() (thankfully!) ignores the length
-  // field and only relies on the start / end parameters.  It does
-  // however return the size of the object which will be incorrect. So
-  // we have to ignore it even if we wanted to use it.
-  to_obj_array->oop_iterate_range(&_scanner, start, end);
-}
-
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
   G1CollectedHeap*              _g1h;
@@ -5027,13 +5018,9 @@
       ReferenceProcessor*             rp = _g1h->ref_processor_stw();

       G1ParScanThreadState            pss(_g1h, worker_id, rp);
-      G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
       G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
-      G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
-
-      pss.set_evac_closure(&scan_evac_cl);
+
       pss.set_evac_failure_closure(&evac_failure_cl);
-      pss.set_partial_scan_closure(&partial_scan_cl);

       G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
       G1ParScanMetadataClosure       only_scan_metadata_cl(_g1h, &pss, rp);
@@ -5287,6 +5274,33 @@
                            g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
                            g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
   }
+
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::unlink(is_alive);
+  }
+}
+
+class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
+public:
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    *card_ptr = CardTableModRefBS::dirty_card_val();
+    return true;
+  }
+};
+
+void G1CollectedHeap::redirty_logged_cards() {
+  guarantee(G1DeferredRSUpdate, "Must only be called when using deferred RS updates.");
+  double redirty_logged_cards_start = os::elapsedTime();
+
+  RedirtyLoggedCardTableEntryFastClosure redirty;
+  dirty_card_queue_set().set_closure(&redirty);
+  dirty_card_queue_set().apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
+  dcq.merge_bufferlists(&dirty_card_queue_set());
+  assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+
+  g1_policy()->phase_times()->record_redirty_logged_cards_time_ms((os::elapsedTime() - redirty_logged_cards_start) * 1000.0);
 }

 // Weak Reference Processing support
@@ -5470,14 +5484,9 @@
     G1STWIsAliveClosure is_alive(_g1h);

     G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);
-
-    pss.set_evac_closure(&scan_evac_cl);
+
     pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);

     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
     G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
@@ -5582,13 +5591,9 @@
     HandleMark   hm;

     G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);
-
-    pss.set_evac_closure(&scan_evac_cl);
+
     pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);

     assert(pss.refs()->is_empty(), "both queue and overflow should be empty");

@@ -5712,13 +5717,9 @@
   // We do not embed a reference processor in the copying/scanning
   // closures while we're actually processing the discovered
   // reference objects.
-  G1ParScanHeapEvacClosure        scan_evac_cl(this, &pss, NULL);
   G1ParScanHeapEvacFailureClosure evac_failure_cl(this, &pss, NULL);
-  G1ParScanPartialArrayClosure    partial_scan_cl(this, &pss, NULL);
-
-  pss.set_evac_closure(&scan_evac_cl);
+
   pss.set_evac_failure_closure(&evac_failure_cl);
-  pss.set_partial_scan_closure(&partial_scan_cl);

   assert(pss.refs()->is_empty(), "pre-condition");

@@ -5900,6 +5901,9 @@
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
     JNIHandles::weak_oops_do(&is_alive, &keep_alive);
+    if (G1StringDedup::is_enabled()) {
+      G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
+    }
   }

   release_gc_alloc_regions(n_workers, evacuation_info);
@@ -5917,6 +5921,8 @@
   // strong code roots for a particular heap region.
   migrate_strong_code_roots();

+  purge_code_root_memory();
+
   if (g1_policy()->during_initial_mark_pause()) {
     // Reset the claim values set during marking the strong code roots
     reset_heap_region_claim_values();
@@ -5943,41 +5949,15 @@
   enqueue_discovered_references(n_workers);

   if (G1DeferredRSUpdate) {
-    RedirtyLoggedCardTableEntryFastClosure redirty;
-    dirty_card_queue_set().set_closure(&redirty);
-    dirty_card_queue_set().apply_closure_to_all_completed_buffers();
-
-    DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
-    dcq.merge_bufferlists(&dirty_card_queue_set());
-    assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+    redirty_logged_cards();
   }
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }

-void G1CollectedHeap::free_region_if_empty(HeapRegion* hr,
-                                     size_t* pre_used,
-                                     FreeRegionList* free_list,
-                                     OldRegionSet* old_proxy_set,
-                                     HumongousRegionSet* humongous_proxy_set,
-                                     HRRSCleanupTask* hrrs_cleanup_task,
-                                     bool par) {
-  if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
-    if (hr->isHumongous()) {
-      assert(hr->startsHumongous(), "we should only see starts humongous");
-      free_humongous_region(hr, pre_used, free_list, humongous_proxy_set, par);
-    } else {
-      _old_set.remove_with_proxy(hr, old_proxy_set);
-      free_region(hr, pre_used, free_list, par);
-    }
-  } else {
-    hr->rem_set()->do_cleanup_work(hrrs_cleanup_task);
-  }
-}
-
 void G1CollectedHeap::free_region(HeapRegion* hr,
-                                  size_t* pre_used,
                                   FreeRegionList* free_list,
-                                  bool par) {
+                                  bool par,
+                                  bool locked) {
   assert(!hr->isHumongous(), "this is only for non-humongous regions");
   assert(!hr->is_empty(), "the region should not be empty");
   assert(free_list != NULL, "pre-condition");
@@ -5988,70 +5968,56 @@
   if (!hr->is_young()) {
     _cg1r->hot_card_cache()->reset_card_counts(hr);
   }
-  *pre_used += hr->used();
-  hr->hr_clear(par, true /* clear_space */);
-  free_list->add_as_head(hr);
+  hr->hr_clear(par, true /* clear_space */, locked /* locked */);
+  free_list->add_ordered(hr);
 }

 void G1CollectedHeap::free_humongous_region(HeapRegion* hr,
-                                     size_t* pre_used,
                                      FreeRegionList* free_list,
-                                     HumongousRegionSet* humongous_proxy_set,
                                      bool par) {
   assert(hr->startsHumongous(), "this is only for starts humongous regions");
   assert(free_list != NULL, "pre-condition");
-  assert(humongous_proxy_set != NULL, "pre-condition");
-
-  size_t hr_used = hr->used();
+
   size_t hr_capacity = hr->capacity();
-  size_t hr_pre_used = 0;
-  _humongous_set.remove_with_proxy(hr, humongous_proxy_set);
   // We need to read this before we make the region non-humongous,
   // otherwise the information will be gone.
   uint last_index = hr->last_hc_index();
   hr->set_notHumongous();
-  free_region(hr, &hr_pre_used, free_list, par);
+  free_region(hr, free_list, par);

   uint i = hr->hrs_index() + 1;
   while (i < last_index) {
     HeapRegion* curr_hr = region_at(i);
     assert(curr_hr->continuesHumongous(), "invariant");
     curr_hr->set_notHumongous();
-    free_region(curr_hr, &hr_pre_used, free_list, par);
+    free_region(curr_hr, free_list, par);
     i += 1;
   }
-  assert(hr_pre_used == hr_used,
-         err_msg("hr_pre_used: "SIZE_FORMAT" and hr_used: "SIZE_FORMAT" "
-                 "should be the same", hr_pre_used, hr_used));
-  *pre_used += hr_pre_used;
-}
-
-void G1CollectedHeap::update_sets_after_freeing_regions(size_t pre_used,
-                                       FreeRegionList* free_list,
-                                       OldRegionSet* old_proxy_set,
-                                       HumongousRegionSet* humongous_proxy_set,
-                                       bool par) {
-  if (pre_used > 0) {
-    Mutex* lock = (par) ? ParGCRareEvent_lock : NULL;
-    MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
-    assert(_summary_bytes_used >= pre_used,
-           err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" "
-                   "should be >= pre_used: "SIZE_FORMAT,
-                   _summary_bytes_used, pre_used));
-    _summary_bytes_used -= pre_used;
-  }
-  if (free_list != NULL && !free_list->is_empty()) {
+}
+
+void G1CollectedHeap::remove_from_old_sets(const HeapRegionSetCount& old_regions_removed,
+                                       const HeapRegionSetCount& humongous_regions_removed) {
+  if (old_regions_removed.length() > 0 || humongous_regions_removed.length() > 0) {
+    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
+    _old_set.bulk_remove(old_regions_removed);
+    _humongous_set.bulk_remove(humongous_regions_removed);
+  }
+
+}
+
+void G1CollectedHeap::prepend_to_freelist(FreeRegionList* list) {
+  assert(list != NULL, "list can't be null");
+  if (!list->is_empty()) {
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    _free_list.add_as_head(free_list);
-  }
-  if (old_proxy_set != NULL && !old_proxy_set->is_empty()) {
-    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
-    _old_set.update_from_proxy(old_proxy_set);
-  }
-  if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) {
-    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
-    _humongous_set.update_from_proxy(humongous_proxy_set);
-  }
+    _free_list.add_ordered(list);
+  }
+}
+
+void G1CollectedHeap::decrement_summary_bytes(size_t bytes) {
+  assert(_summary_bytes_used >= bytes,
+         err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" should be >= bytes: "SIZE_FORMAT,
+                  _summary_bytes_used, bytes));
+  _summary_bytes_used -= bytes;
 }

 class G1ParCleanupCTTask : public AbstractGangTask {
@@ -6211,7 +6177,7 @@
       }
     }

-    rs_lengths += cur->rem_set()->occupied();
+    rs_lengths += cur->rem_set()->occupied_locked();

     HeapRegion* next = cur->next_in_collection_set();
     assert(cur->in_collection_set(), "bad CS");
@@ -6244,7 +6210,8 @@

       // And the region is empty.
       assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
-      free_region(cur, &pre_used, &local_free_list, false /* par */);
+      pre_used += cur->used();
+      free_region(cur, &local_free_list, false /* par */, true /* locked */);
     } else {
       cur->uninstall_surv_rate_group();
       if (cur->is_young()) {
@@ -6272,10 +6239,8 @@
     young_time_ms += elapsed_ms;
   }

-  update_sets_after_freeing_regions(pre_used, &local_free_list,
-                                    NULL /* old_proxy_set */,
-                                    NULL /* humongous_proxy_set */,
-                                    false /* par */);
+  prepend_to_freelist(&local_free_list);
+  decrement_summary_bytes(pre_used);
   policy->phase_times()->record_young_free_cset_time_ms(young_time_ms);
   policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
@@ -6387,10 +6352,10 @@

 class TearDownRegionSetsClosure : public HeapRegionClosure {
 private:
-  OldRegionSet *_old_set;
+  HeapRegionSet *_old_set;

 public:
-  TearDownRegionSetsClosure(OldRegionSet* old_set) : _old_set(old_set) { }
+  TearDownRegionSetsClosure(HeapRegionSet* old_set) : _old_set(old_set) { }

   bool doHeapRegion(HeapRegion* r) {
     if (r->is_empty()) {
@@ -6419,9 +6384,10 @@
     TearDownRegionSetsClosure cl(&_old_set);
     heap_region_iterate(&cl);

-    // Need to do this after the heap iteration to be able to
-    // recognize the young regions and ignore them during the iteration.
-    _young_list->empty_list();
+    // Note that emptying the _young_list is postponed and instead done as
+    // the first step when rebuilding the regions sets again. The reason for
+    // this is that during a full GC string deduplication needs to know if
+    // a collected region was young or old when the full GC was initiated.
   }
   _free_list.remove_all();
 }
@@ -6429,13 +6395,13 @@
 class RebuildRegionSetsClosure : public HeapRegionClosure {
 private:
   bool            _free_list_only;
-  OldRegionSet*   _old_set;
+  HeapRegionSet*   _old_set;
   FreeRegionList* _free_list;
   size_t          _total_used;

 public:
   RebuildRegionSetsClosure(bool free_list_only,
-                           OldRegionSet* old_set, FreeRegionList* free_list) :
+                           HeapRegionSet* old_set, FreeRegionList* free_list) :
     _free_list_only(free_list_only),
     _old_set(old_set), _free_list(free_list), _total_used(0) {
     assert(_free_list->is_empty(), "pre-condition");
@@ -6475,6 +6441,10 @@
 void G1CollectedHeap::rebuild_region_sets(bool free_list_only) {
   assert_at_safepoint(true /* should_be_vm_thread */);

+  if (!free_list_only) {
+    _young_list->empty_list();
+  }
+
   RebuildRegionSetsClosure cl(free_list_only, &_old_set, &_free_list);
   heap_region_iterate(&cl);

@@ -6510,6 +6480,7 @@
   bool young_list_full = g1_policy()->is_young_list_full();
   if (force || !young_list_full) {
     HeapRegion* new_alloc_region = new_region(word_size,
+                                              false /* is_old */,
                                               false /* do_expand */);
     if (new_alloc_region != NULL) {
       set_region_short_lived_locked(new_alloc_region);
@@ -6568,14 +6539,16 @@
   assert(FreeList_lock->owned_by_self(), "pre-condition");

   if (count < g1_policy()->max_regions(ap)) {
+    bool survivor = (ap == GCAllocForSurvived);
     HeapRegion* new_alloc_region = new_region(word_size,
+                                              !survivor,
                                               true /* do_expand */);
     if (new_alloc_region != NULL) {
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
       new_alloc_region->set_saved_mark();
-      if (ap == GCAllocForSurvived) {
+      if (survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
       } else {
@@ -6632,23 +6605,22 @@

 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
-  FreeRegionList*     _free_list;
-  OldRegionSet*       _old_set;
-  HumongousRegionSet* _humongous_set;
-  uint                _region_count;
+  HeapRegionSet*   _old_set;
+  HeapRegionSet*   _humongous_set;
+  FreeRegionList*  _free_list;

 public:
-  VerifyRegionListsClosure(OldRegionSet* old_set,
-                           HumongousRegionSet* humongous_set,
+  HeapRegionSetCount _old_count;
+  HeapRegionSetCount _humongous_count;
+  HeapRegionSetCount _free_count;
+
+  VerifyRegionListsClosure(HeapRegionSet* old_set,
+                           HeapRegionSet* humongous_set,
                            FreeRegionList* free_list) :
-    _old_set(old_set), _humongous_set(humongous_set),
-    _free_list(free_list), _region_count(0) { }
-
-  uint region_count() { return _region_count; }
+    _old_set(old_set), _humongous_set(humongous_set), _free_list(free_list),
+    _old_count(), _humongous_count(), _free_count(){ }

   bool doHeapRegion(HeapRegion* hr) {
-    _region_count += 1;
-
     if (hr->continuesHumongous()) {
       return false;
     }
@@ -6656,14 +6628,31 @@
     if (hr->is_young()) {
       // TODO
     } else if (hr->startsHumongous()) {
-      _humongous_set->verify_next_region(hr);
+      assert(hr->containing_set() == _humongous_set, err_msg("Heap region %u is starts humongous but not in humongous set.", hr->region_num()));
+      _humongous_count.increment(1u, hr->capacity());
     } else if (hr->is_empty()) {
-      _free_list->verify_next_region(hr);
+      assert(hr->containing_set() == _free_list, err_msg("Heap region %u is empty but not on the free list.", hr->region_num()));
+      _free_count.increment(1u, hr->capacity());
     } else {
-      _old_set->verify_next_region(hr);
+      assert(hr->containing_set() == _old_set, err_msg("Heap region %u is old but not in the old set.", hr->region_num()));
+      _old_count.increment(1u, hr->capacity());
     }
     return false;
   }
+
+  void verify_counts(HeapRegionSet* old_set, HeapRegionSet* humongous_set, FreeRegionList* free_list) {
+    guarantee(old_set->length() == _old_count.length(), err_msg("Old set count mismatch. Expected %u, actual %u.", old_set->length(), _old_count.length()));
+    guarantee(old_set->total_capacity_bytes() == _old_count.capacity(), err_msg("Old set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        old_set->total_capacity_bytes(), _old_count.capacity()));
+
+    guarantee(humongous_set->length() == _humongous_count.length(), err_msg("Hum set count mismatch. Expected %u, actual %u.", humongous_set->length(), _humongous_count.length()));
+    guarantee(humongous_set->total_capacity_bytes() == _humongous_count.capacity(), err_msg("Hum set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        humongous_set->total_capacity_bytes(), _humongous_count.capacity()));
+
+    guarantee(free_list->length() == _free_count.length(), err_msg("Free list count mismatch. Expected %u, actual %u.", free_list->length(), _free_count.length()));
+    guarantee(free_list->total_capacity_bytes() == _free_count.capacity(), err_msg("Free list capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        free_list->total_capacity_bytes(), _free_count.capacity()));
+  }
 };

 HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
@@ -6679,16 +6668,14 @@
   assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);

   // First, check the explicit lists.
-  _free_list.verify();
+  _free_list.verify_list();
   {
     // Given that a concurrent operation might be adding regions to
     // the secondary free list we have to take the lock before
     // verifying it.
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
-    _secondary_free_list.verify();
-  }
-  _old_set.verify();
-  _humongous_set.verify();
+    _secondary_free_list.verify_list();
+  }

   // If a concurrent region freeing operation is in progress it will
   // be difficult to correctly attributed any free regions we come
@@ -6711,16 +6698,10 @@

   // Finally, make sure that the region accounting in the lists is
   // consistent with what we see in the heap.
-  _old_set.verify_start();
-  _humongous_set.verify_start();
-  _free_list.verify_start();

   VerifyRegionListsClosure cl(&_old_set, &_humongous_set, &_free_list);
   heap_region_iterate(&cl);
-
-  _old_set.verify_end();
-  _humongous_set.verify_end();
-  _free_list.verify_end();
+  cl.verify_counts(&_old_set, &_humongous_set, &_free_list);
 }

 // Optimized nmethod scanning
@@ -6821,6 +6802,13 @@
   g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms);
 }

+void G1CollectedHeap::purge_code_root_memory() {
+  double purge_start = os::elapsedTime();
+  G1CodeRootSet::purge_chunks(G1CodeRootsChunkCacheKeepPercent);
+  double purge_time_ms = (os::elapsedTime() - purge_start) * 1000.0;
+  g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms);
+}
+
 // Mark all the code roots that point into regions *not* in the
 // collection set.
 //
@@ -6891,7 +6879,7 @@
       // Code roots should never be attached to a continuation of a humongous region
       assert(hrrs->strong_code_roots_list_length() == 0,
              err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT
-                     " starting at "HR_FORMAT", but has "INT32_FORMAT,
+                     " starting at "HR_FORMAT", but has "SIZE_FORMAT,
                      HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()),
                      hrrs->strong_code_roots_list_length()));
       return false;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegionSeq.hpp"
-#include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
@@ -243,18 +243,18 @@
   MemRegion _g1_committed;

   // The master free list. It will satisfy all new region allocations.
-  MasterFreeRegionList      _free_list;
+  FreeRegionList _free_list;

   // The secondary free list which contains regions that have been
   // freed up during the cleanup process. This will be appended to the
   // master free list when appropriate.
-  SecondaryFreeRegionList   _secondary_free_list;
+  FreeRegionList _secondary_free_list;

   // It keeps track of the old regions.
-  MasterOldRegionSet        _old_set;
+  HeapRegionSet _old_set;

   // It keeps track of the humongous regions.
-  MasterHumongousRegionSet  _humongous_set;
+  HeapRegionSet _humongous_set;

   // The number of regions we could create by expansion.
   uint _expansion_regions;
@@ -497,13 +497,14 @@
   // check whether there's anything available on the
   // secondary_free_list and/or wait for more regions to appear on
   // that list, if _free_regions_coming is set.
-  HeapRegion* new_region_try_secondary_free_list();
+  HeapRegion* new_region_try_secondary_free_list(bool is_old);

   // Try to allocate a single non-humongous HeapRegion sufficient for
   // an allocation of the given word_size. If do_expand is true,
   // attempt to expand the heap if necessary to satisfy the allocation
-  // request.
-  HeapRegion* new_region(size_t word_size, bool do_expand);
+  // request. If the region is to be used as an old region or for a
+  // humongous object, set is_old to true. If not, to false.
+  HeapRegion* new_region(size_t word_size, bool is_old, bool do_expand);

   // Attempt to satisfy a humongous allocation request of the given
   // size by finding a contiguous set of free regions of num_regions
@@ -757,6 +758,29 @@

   G1HRPrinter* hr_printer() { return &_hr_printer; }

+  // Frees a non-humongous region by initializing its contents and
+  // adding it to the free list that's passed as a parameter (this is
+  // usually a local list which will be appended to the master free
+  // list later). The used bytes of freed regions are accumulated in
+  // pre_used. If par is true, the region's RSet will not be freed
+  // up. The assumption is that this will be done later.
+  // The locked parameter indicates if the caller has already taken
+  // care of proper synchronization. This may allow some optimizations.
+  void free_region(HeapRegion* hr,
+                   FreeRegionList* free_list,
+                   bool par,
+                   bool locked = false);
+
+  // Frees a humongous region by collapsing it into individual regions
+  // and calling free_region() for each of them. The freed regions
+  // will be added to the free list that's passed as a parameter (this
+  // is usually a local list which will be appended to the master free
+  // list later). The used bytes of freed regions are accumulated in
+  // pre_used. If par is true, the region's RSet will not be freed
+  // up. The assumption is that this will be done later.
+  void free_humongous_region(HeapRegion* hr,
+                             FreeRegionList* free_list,
+                             bool par);
 protected:

   // Shrink the garbage-first heap by at most the given size (in bytes!).
@@ -840,30 +864,6 @@
   // string table, and referents of reachable weak refs.
   void g1_process_weak_roots(OopClosure* root_closure);

-  // Frees a non-humongous region by initializing its contents and
-  // adding it to the free list that's passed as a parameter (this is
-  // usually a local list which will be appended to the master free
-  // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
-  // up. The assumption is that this will be done later.
-  void free_region(HeapRegion* hr,
-                   size_t* pre_used,
-                   FreeRegionList* free_list,
-                   bool par);
-
-  // Frees a humongous region by collapsing it into individual regions
-  // and calling free_region() for each of them. The freed regions
-  // will be added to the free list that's passed as a parameter (this
-  // is usually a local list which will be appended to the master free
-  // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
-  // up. The assumption is that this will be done later.
-  void free_humongous_region(HeapRegion* hr,
-                             size_t* pre_used,
-                             FreeRegionList* free_list,
-                             HumongousRegionSet* humongous_proxy_set,
-                             bool par);
-
   // Notifies all the necessary spaces that the committed space has
   // been updated (either expanded or shrunk). It should be called
   // after _g1_storage is updated.
@@ -1242,21 +1242,17 @@
   bool is_on_master_free_list(HeapRegion* hr) {
     return hr->containing_set() == &_free_list;
   }
-
-  bool is_in_humongous_set(HeapRegion* hr) {
-    return hr->containing_set() == &_humongous_set;
-  }
 #endif // ASSERT

   // Wrapper for the region list operations that can be called from
   // methods outside this class.

-  void secondary_free_list_add_as_tail(FreeRegionList* list) {
-    _secondary_free_list.add_as_tail(list);
+  void secondary_free_list_add(FreeRegionList* list) {
+    _secondary_free_list.add_ordered(list);
   }

   void append_secondary_free_list() {
-    _free_list.add_as_head(&_secondary_free_list);
+    _free_list.add_ordered(&_secondary_free_list);
   }

   void append_secondary_free_list_if_not_empty_with_lock() {
@@ -1298,27 +1294,9 @@
   // True iff an evacuation has failed in the most-recent collection.
   bool evacuation_failed() { return _evacuation_failed; }

-  // It will free a region if it has allocated objects in it that are
-  // all dead. It calls either free_region() or
-  // free_humongous_region() depending on the type of the region that
-  // is passed to it.
-  void free_region_if_empty(HeapRegion* hr,
-                            size_t* pre_used,
-                            FreeRegionList* free_list,
-                            OldRegionSet* old_proxy_set,
-                            HumongousRegionSet* humongous_proxy_set,
-                            HRRSCleanupTask* hrrs_cleanup_task,
-                            bool par);
-
-  // It appends the free list to the master free list and updates the
-  // master humongous list according to the contents of the proxy
-  // list. It also adjusts the total used bytes according to pre_used
-  // (if par is true, it will do so by taking the ParGCRareEvent_lock).
-  void update_sets_after_freeing_regions(size_t pre_used,
-                                       FreeRegionList* free_list,
-                                       OldRegionSet* old_proxy_set,
-                                       HumongousRegionSet* humongous_proxy_set,
-                                       bool par);
+  void remove_from_old_sets(const HeapRegionSetCount& old_regions_removed, const HeapRegionSetCount& humongous_regions_removed);
+  void prepend_to_freelist(FreeRegionList* list);
+  void decrement_summary_bytes(size_t bytes);

   // Returns "TRUE" iff "p" points into the committed areas of the heap.
   virtual bool is_in(const void* p) const;
@@ -1481,9 +1459,11 @@
   // Section on thread-local allocation buffers (TLABs)
   // See CollectedHeap for semantics.

-  virtual bool supports_tlab_allocation() const;
-  virtual size_t tlab_capacity(Thread* thr) const;
-  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
+  bool supports_tlab_allocation() const;
+  size_t tlab_capacity(Thread* ignored) const;
+  size_t tlab_used(Thread* ignored) const;
+  size_t max_tlab_size() const;
+  size_t unsafe_max_tlab_alloc(Thread* ignored) const;

   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
@@ -1568,7 +1548,7 @@
   void set_region_short_lived_locked(HeapRegion* hr);
   // add appropriate methods for any other surv rate groups

-  YoungList* young_list() { return _young_list; }
+  YoungList* young_list() const { return _young_list; }

   // debugging
   bool check_young_list_well_formed() {
@@ -1671,6 +1651,9 @@
   // that were not successfullly evacuated are not migrated.
   void migrate_strong_code_roots();

+  // Free up superfluous code root memory.
+  void purge_code_root_memory();
+
   // During an initial mark pause, mark all the code roots that
   // point into regions *not* in the collection set.
   void mark_strong_code_roots(uint worker_id);
@@ -1683,6 +1666,8 @@
   // in symbol table, possibly in parallel.
   void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);

+  // Redirty logged cards in the refinement queue.
+  void redirty_logged_cards();
   // Verification

   // The following is just to alert the verification code
@@ -1809,8 +1794,6 @@
   size_t           _undo_waste;

   OopsInHeapRegionClosure*      _evac_failure_cl;
-  G1ParScanHeapEvacClosure*     _evac_cl;
-  G1ParScanPartialArrayClosure* _partial_scan_cl;

   int  _hash_seed;
   uint _queue_num;
@@ -1938,14 +1921,6 @@
     return _evac_failure_cl;
   }

-  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
-    _evac_cl = evac_cl;
-  }
-
-  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
-    _partial_scan_cl = partial_scan_cl;
-  }
-
   int* hash_seed() { return &_hash_seed; }
   uint queue_num() { return _queue_num; }

@@ -1993,19 +1968,121 @@
                                                  false /* retain */);
     }
   }
+private:
+  #define G1_PARTIAL_ARRAY_MASK 0x2
+
+  inline bool has_partial_array_mask(oop* ref) const {
+    return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
+  }
+
+  // We never encode partial array oops as narrowOop*, so return false immediately.
+  // This allows the compiler to create optimized code when popping references from
+  // the work queue.
+  inline bool has_partial_array_mask(narrowOop* ref) const {
+    assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
+    return false;
+  }
+
+  // Only implement set_partial_array_mask() for regular oops, not for narrowOops.
+  // We always encode partial arrays as regular oop, to allow the
+  // specialization for has_partial_array_mask() for narrowOops above.
+  // This means that unintentional use of this method with narrowOops are caught
+  // by the compiler.
+  inline oop* set_partial_array_mask(oop obj) const {
+    assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
+    return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
+  }
+
+  inline oop clear_partial_array_mask(oop* ref) const {
+    return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
+  }
+
+  void do_oop_partial_array(oop* p) {
+    assert(has_partial_array_mask(p), "invariant");
+    oop from_obj = clear_partial_array_mask(p);
+
+    assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+    assert(from_obj->is_objArray(), "must be obj array");
+    objArrayOop from_obj_array = objArrayOop(from_obj);
+    // The from-space object contains the real length.
+    int length                 = from_obj_array->length();
+
+    assert(from_obj->is_forwarded(), "must be forwarded");
+    oop to_obj                 = from_obj->forwardee();
+    assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+    objArrayOop to_obj_array   = objArrayOop(to_obj);
+    // We keep track of the next start index in the length field of the
+    // to-space object.
+    int next_index             = to_obj_array->length();
+    assert(0 <= next_index && next_index < length,
+           err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+    int start                  = next_index;
+    int end                    = length;
+    int remainder              = end - start;
+    // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
+    if (remainder > 2 * ParGCArrayScanChunk) {
+      end = start + ParGCArrayScanChunk;
+      to_obj_array->set_length(end);
+      // Push the remainder before we process the range in case another
+      // worker has run out of things to do and can steal it.
+      oop* from_obj_p = set_partial_array_mask(from_obj);
+      push_on_queue(from_obj_p);
+    } else {
+      assert(length == end, "sanity");
+      // We'll process the final range for this object. Restore the length
+      // so that the heap remains parsable in case of evacuation failure.
+      to_obj_array->set_length(end);
+    }
+    _scanner.set_region(_g1h->heap_region_containing_raw(to_obj));
+    // Process indexes [start,end). It will also process the header
+    // along with the first chunk (i.e., the chunk with start == 0).
+    // Note that at this point the length field of to_obj_array is not
+    // correct given that we are using it to keep track of the next
+    // start index. oop_iterate_range() (thankfully!) ignores the length
+    // field and only relies on the start / end parameters.  It does
+    // however return the size of the object which will be incorrect. So
+    // we have to ignore it even if we wanted to use it.
+    to_obj_array->oop_iterate_range(&_scanner, start, end);
+  }
+
+  // This method is applied to the fields of the objects that have just been copied.
+  template <class T> void do_oop_evac(T* p, HeapRegion* from) {
+    assert(!oopDesc::is_null(oopDesc::load_decode_heap_oop(p)),
+           "Reference should not be NULL here as such are never pushed to the task queue.");
+    oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+
+    // Although we never intentionally push references outside of the collection
+    // set, due to (benign) races in the claim mechanism during RSet scanning more
+    // than one thread might claim the same card. So the same card may be
+    // processed multiple times. So redo this check.
+    if (_g1h->in_cset_fast_test(obj)) {
+      oop forwardee;
+      if (obj->is_forwarded()) {
+        forwardee = obj->forwardee();
+      } else {
+        forwardee = copy_to_survivor_space(obj);
+      }
+      assert(forwardee != NULL, "forwardee should not be NULL");
+      oopDesc::encode_store_heap_oop(p, forwardee);
+    }
+
+    assert(obj != NULL, "Must be");
+    update_rs(from, p, queue_num());
+  }
+public:

   oop copy_to_survivor_space(oop const obj);

   template <class T> void deal_with_reference(T* ref_to_scan) {
-    if (has_partial_array_mask(ref_to_scan)) {
-      _partial_scan_cl->do_oop_nv(ref_to_scan);
-    } else {
+    if (!has_partial_array_mask(ref_to_scan)) {
       // Note: we can use "raw" versions of "region_containing" because
       // "obj_to_scan" is definitely in the heap, and is not in a
       // humongous region.
       HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-      _evac_cl->set_region(r);
-      _evac_cl->do_oop_nv(ref_to_scan);
+      do_oop_evac(ref_to_scan, r);
+    } else {
+      do_oop_partial_array((oop*)ref_to_scan);
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -30,6 +30,7 @@
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegionSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "utilities/taskqueue.hpp"
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -820,6 +820,8 @@
     // do that for any other surv rate groups
   }

+  size_t young_list_target_length() const { return _young_list_target_length; }
+
   bool is_young_list_full() {
     uint young_list_length = _g1->young_list()->length();
     uint young_list_target_length = _young_list_target_length;
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"

 // Helper class for avoiding interleaved logging
 class LineBuffer: public StackObj {
@@ -168,7 +169,9 @@
   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
   _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
   _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf")
+  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
+  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
+  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
 {
   assert(max_gc_threads > 0, "Must have some GC threads");
 }
@@ -229,6 +232,16 @@
   _last_gc_worker_other_times_ms.verify();
 }

+void G1GCPhaseTimes::note_string_dedup_fixup_start() {
+  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
+  _cur_string_dedup_table_fixup_worker_times_ms.reset();
+}
+
+void G1GCPhaseTimes::note_string_dedup_fixup_end() {
+  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
+  _cur_string_dedup_table_fixup_worker_times_ms.verify();
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }
@@ -250,6 +263,14 @@
     // Strong code root migration time
     misc_time_ms += _cur_strong_code_root_migration_time_ms;

+    // Strong code root purge time
+    misc_time_ms += _cur_strong_code_root_purge_time_ms;
+
+    if (G1StringDedup::is_enabled()) {
+      // String dedup fixup time
+      misc_time_ms += _cur_string_dedup_fixup_time_ms;
+    }
+
     // Subtract the time taken to clean the card table from the
     // current value of "other time"
     misc_time_ms += _cur_clear_ct_time_ms;
@@ -299,20 +320,43 @@
   }
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
   print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms);
+  print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
+  if (G1StringDedup::is_enabled()) {
+    print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
+    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
+    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
+  }
   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
   print_stats(1, "Other", misc_time_ms);
   if (_cur_verify_before_time_ms > 0.0) {
     print_stats(2, "Verify Before", _cur_verify_before_time_ms);
   }
+  if (G1CollectedHeap::heap()->evacuation_failed()) {
+    double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards +
+      _cur_evac_fail_restore_remsets;
+    print_stats(2, "Evacuation Failure", evac_fail_handling);
+    if (G1Log::finest()) {
+      print_stats(3, "Recalculate Used", _cur_evac_fail_recalc_used);
+      print_stats(3, "Remove Self Forwards", _cur_evac_fail_remove_self_forwards);
+      print_stats(3, "Restore RemSet", _cur_evac_fail_restore_remsets);
+    }
+  }
   print_stats(2, "Choose CSet",
     (_recorded_young_cset_choice_time_ms +
     _recorded_non_young_cset_choice_time_ms));
   print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
+  if (G1DeferredRSUpdate) {
+    print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
+  }
   print_stats(2, "Free CSet",
     (_recorded_young_free_cset_time_ms +
     _recorded_non_young_free_cset_time_ms));
+  if (G1Log::finest()) {
+    print_stats(3, "Young Free CSet", _recorded_young_free_cset_time_ms);
+    print_stats(3, "Non-Young Free CSet", _recorded_non_young_free_cset_time_ms);
+  }
   if (_cur_verify_after_time_ms > 0.0) {
     print_stats(2, "Verify After", _cur_verify_after_time_ms);
   }
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -131,6 +131,15 @@
   double _cur_collection_par_time_ms;
   double _cur_collection_code_root_fixup_time_ms;
   double _cur_strong_code_root_migration_time_ms;
+  double _cur_strong_code_root_purge_time_ms;
+
+  double _cur_evac_fail_recalc_used;
+  double _cur_evac_fail_restore_remsets;
+  double _cur_evac_fail_remove_self_forwards;
+
+  double                  _cur_string_dedup_fixup_time_ms;
+  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
+  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;

   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -142,6 +151,8 @@
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;

+  double _recorded_redirty_logged_cards_time_ms;
+
   double _recorded_young_free_cset_time_ms;
   double _recorded_non_young_free_cset_time_ms;

@@ -223,6 +234,37 @@
     _cur_strong_code_root_migration_time_ms = ms;
   }

+  void record_strong_code_root_purge_time(double ms) {
+    _cur_strong_code_root_purge_time_ms = ms;
+  }
+
+  void record_evac_fail_recalc_used_time(double ms) {
+    _cur_evac_fail_recalc_used = ms;
+  }
+
+  void record_evac_fail_restore_remsets(double ms) {
+    _cur_evac_fail_restore_remsets = ms;
+  }
+
+  void record_evac_fail_remove_self_forwards(double ms) {
+    _cur_evac_fail_remove_self_forwards = ms;
+  }
+
+  void note_string_dedup_fixup_start();
+  void note_string_dedup_fixup_end();
+
+  void record_string_dedup_fixup_time(double ms) {
+    _cur_string_dedup_fixup_time_ms = ms;
+  }
+
+  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
+    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
+  }
+
+  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
+    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
+  }
+
   void record_ref_proc_time(double ms) {
     _cur_ref_proc_time_ms = ms;
   }
@@ -251,6 +293,10 @@
     _recorded_non_young_cset_choice_time_ms = time_ms;
   }

+  void record_redirty_logged_cards_time_ms(double time_ms) {
+    _recorded_redirty_logged_cards_time_ms = time_ms;
+  }
+
   void record_cur_collection_start_sec(double time_ms) {
     _cur_collection_start_sec = time_ms;
   }
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
@@ -196,17 +197,19 @@
   G1CollectedHeap* _g1h;
   ModRefBarrierSet* _mrbs;
   CompactPoint _cp;
-  HumongousRegionSet _humongous_proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;

   void free_humongous_region(HeapRegion* hr) {
     HeapWord* end = hr->end();
-    size_t dummy_pre_used;
     FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep");

     assert(hr->startsHumongous(),
            "Only the start of a humongous region should be freed.");
-    _g1h->free_humongous_region(hr, &dummy_pre_used, &dummy_free_list,
-                                &_humongous_proxy_set, false /* par */);
+
+    hr->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, hr->capacity());
+
+    _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
     hr->prepare_for_compaction(&_cp);
     // Also clear the part of the card table that will be unused after
     // compaction.
@@ -219,16 +222,13 @@
   : _g1h(G1CollectedHeap::heap()),
     _mrbs(_g1h->g1_barrier_set()),
     _cp(NULL, cs, cs->initialize_threshold()),
-    _humongous_proxy_set("G1MarkSweep Humongous Proxy Set") { }
+    _humongous_regions_removed() { }

   void update_sets() {
     // We'll recalculate total used bytes and recreate the free list
     // at the end of the GC, so no point in updating those values here.
-    _g1h->update_sets_after_freeing_regions(0, /* pre_used */
-                                            NULL, /* free_list */
-                                            NULL, /* old_proxy_set */
-                                            &_humongous_proxy_set,
-                                            false /* par */);
+    HeapRegionSetCount empty_set;
+    _g1h->remove_from_old_sets(empty_set, _humongous_regions_removed);
   }

   bool doHeapRegion(HeapRegion* hr) {
@@ -321,6 +321,10 @@
   // have been cleared if they pointed to non-surviving objects.)
   g1h->g1_process_weak_roots(&GenMarkSweep::adjust_pointer_closure);

+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
+  }
+
   GenMarkSweep::adjust_marks();

   G1AdjustPointersClosure blk;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -80,53 +80,6 @@
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };

-#define G1_PARTIAL_ARRAY_MASK 0x2
-
-inline bool has_partial_array_mask(oop* ref) {
-  return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
-}
-
-// We never encode partial array oops as narrowOop*, so return false immediately.
-// This allows the compiler to create optimized code when popping references from
-// the work queue.
-inline bool has_partial_array_mask(narrowOop* ref) {
-  assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
-  return false;
-}
-
-// Only implement set_partial_array_mask() for regular oops, not for narrowOops.
-// We always encode partial arrays as regular oop, to allow the
-// specialization for has_partial_array_mask() for narrowOops above.
-// This means that unintentional use of this method with narrowOops are caught
-// by the compiler.
-inline oop* set_partial_array_mask(oop obj) {
-  assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
-  return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
-}
-
-template <class T> inline oop clear_partial_array_mask(T* ref) {
-  return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
-}
-
-class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
-  G1ParScanClosure _scanner;
-
-public:
-  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) :
-    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state, rp)
-  {
-    assert(_ref_processor == NULL, "sanity");
-  }
-
-  G1ParScanClosure* scanner() {
-    return &_scanner;
-  }
-
-  template <class T> void do_oop_nv(T* p);
-  virtual void do_oop(oop* p)       { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
-};
-
 // Add back base class for metadata
 class G1ParCopyHelper : public G1ParClosureSuper {
 protected:
@@ -173,15 +126,8 @@
 typedef G1ParCopyClosure<G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
 typedef G1ParCopyClosure<G1BarrierKlass, true> G1ParScanAndMarkMetadataClosure;

-// The following closure type is defined in g1_specialized_oop_closures.hpp:
-//
-// typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
 // We use a separate closure to handle references during evacuation
 // failure processing.
-// We could have used another instance of G1ParScanHeapEvacClosure
-// (since that closure no longer assumes that the references it
-// handles point into the collection set).

 typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -463,8 +463,9 @@
   int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();

   if (_g1->evacuation_failed()) {
+    double restore_remembered_set_start = os::elapsedTime();
+
     // Restore remembered sets for the regions pointing into the collection set.
-
     if (G1DeferredRSUpdate) {
       // If deferred RS updates are enabled then we just need to transfer
       // the completed buffers from (a) the DirtyCardQueueSet used to hold
@@ -483,6 +484,8 @@
       }
       assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
     }
+
+    _g1->g1_policy()->phase_times()->record_evac_fail_restore_remsets((os::elapsedTime() - restore_remembered_set_start) * 1000.0);
   }

   // Free any completed buffers in the DirtyCardQueueSet used to hold cards
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "gc_implementation/g1/g1StringDedupThread.hpp"
+
+bool G1StringDedup::_enabled = false;
+
+void G1StringDedup::initialize() {
+  assert(UseG1GC, "String deduplication only available with G1");
+  if (UseStringDeduplication) {
+    _enabled = true;
+    G1StringDedupQueue::create();
+    G1StringDedupTable::create();
+    G1StringDedupThread::create();
+  }
+}
+
+bool G1StringDedup::is_candidate_from_mark(oop obj) {
+  if (java_lang_String::is_instance(obj)) {
+    bool from_young = G1CollectedHeap::heap()->heap_region_containing_raw(obj)->is_young();
+    if (from_young && obj->age() < StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to old but has not
+      // reached the deduplication age threshold, i.e. has not previously been a
+      // candidate during its life in the young generation.
+      return true;
+    }
+  }
+
+  // Not a candidate
+  return false;
+}
+
+void G1StringDedup::enqueue_from_mark(oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  if (is_candidate_from_mark(java_string)) {
+    G1StringDedupQueue::push(0 /* worker_id */, java_string);
+  }
+}
+
+bool G1StringDedup::is_candidate_from_evacuation(bool from_young, bool to_young, oop obj) {
+  if (from_young && java_lang_String::is_instance(obj)) {
+    if (to_young && obj->age() == StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to young and just
+      // reached the deduplication age threshold.
+      return true;
+    }
+    if (!to_young && obj->age() < StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to old but has not
+      // reached the deduplication age threshold, i.e. has not previously been a
+      // candidate during its life in the young generation.
+      return true;
+    }
+  }
+
+  // Not a candidate
+  return false;
+}
+
+void G1StringDedup::enqueue_from_evacuation(bool from_young, bool to_young, uint worker_id, oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  if (is_candidate_from_evacuation(from_young, to_young, java_string)) {
+    G1StringDedupQueue::push(worker_id, java_string);
+  }
+}
+
+void G1StringDedup::deduplicate(oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupStat dummy; // Statistics from this path is never used
+  G1StringDedupTable::deduplicate(java_string, dummy);
+}
+
+void G1StringDedup::oops_do(OopClosure* keep_alive) {
+  assert(is_enabled(), "String deduplication not enabled");
+  unlink_or_oops_do(NULL, keep_alive);
+}
+
+void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
+  assert(is_enabled(), "String deduplication not enabled");
+  // Don't allow a potential resize or rehash during unlink, as the unlink
+  // operation itself might remove enough entries to invalidate such a decision.
+  unlink_or_oops_do(is_alive, NULL, false /* allow_resize_and_rehash */);
+}
+
+//
+// Task for parallel unlink_or_oops_do() operation on the deduplication queue
+// and table.
+//
+class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
+private:
+  G1StringDedupUnlinkOrOopsDoClosure _cl;
+
+public:
+  G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
+                                  OopClosure* keep_alive,
+                                  bool allow_resize_and_rehash) :
+    AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
+    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
+  }
+
+  virtual void work(uint worker_id) {
+    double queue_fixup_start = os::elapsedTime();
+    G1StringDedupQueue::unlink_or_oops_do(&_cl);
+
+    double table_fixup_start = os::elapsedTime();
+    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+
+    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
+    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
+    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
+    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
+  }
+};
+
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->phase_times()->note_string_dedup_fixup_start();
+  double fixup_start = os::elapsedTime();
+
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    g1h->set_par_threads();
+    g1h->workers()->run_task(&task);
+    g1h->set_par_threads(0);
+  } else {
+    task.work(0);
+  }
+
+  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
+  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
+  g1p->phase_times()->note_string_dedup_fixup_end();
+}
+
+void G1StringDedup::threads_do(ThreadClosure* tc) {
+  assert(is_enabled(), "String deduplication not enabled");
+  tc->do_thread(G1StringDedupThread::thread());
+}
+
+void G1StringDedup::print_worker_threads_on(outputStream* st) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupThread::thread()->print_on(st);
+  st->cr();
+}
+
+void G1StringDedup::verify() {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupQueue::verify();
+  G1StringDedupTable::verify();
+}
+
+G1StringDedupUnlinkOrOopsDoClosure::G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive,
+                                                                       OopClosure* keep_alive,
+                                                                       bool allow_resize_and_rehash) :
+  _is_alive(is_alive),
+  _keep_alive(keep_alive),
+  _resized_table(NULL),
+  _rehashed_table(NULL),
+  _next_queue(0),
+  _next_bucket(0) {
+  if (allow_resize_and_rehash) {
+    // If both resize and rehash is needed, only do resize. Rehash of
+    // the table will eventually happen if the situation persists.
+    _resized_table = G1StringDedupTable::prepare_resize();
+    if (!is_resizing()) {
+      _rehashed_table = G1StringDedupTable::prepare_rehash();
+    }
+  }
+}
+
+G1StringDedupUnlinkOrOopsDoClosure::~G1StringDedupUnlinkOrOopsDoClosure() {
+  assert(!is_resizing() || !is_rehashing(), "Can not both resize and rehash");
+  if (is_resizing()) {
+    G1StringDedupTable::finish_resize(_resized_table);
+  } else if (is_rehashing()) {
+    G1StringDedupTable::finish_rehash(_rehashed_table);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
+
+//
+// String Deduplication
+//
+// String deduplication aims to reduce the heap live-set by deduplicating identical
+// instances of String so that they share the same backing character array.
+//
+// The deduplication process is divided in two main parts, 1) finding the objects to
+// deduplicate, and 2) deduplicating those objects. The first part is done as part of
+// a normal GC cycle when objects are marked or evacuated. At this time a check is
+// applied on each object to check if it is a candidate for deduplication. If so, the
+// object is placed on the deduplication queue for later processing. The second part,
+// processing the objects on the deduplication queue, is a concurrent phase which
+// starts right after the stop-the-wold marking/evacuation phase. This phase is
+// executed by the deduplication thread, which pulls deduplication candidates of the
+// deduplication queue and tries to deduplicate them.
+//
+// A deduplication hashtable is used to keep track of all unique character arrays
+// used by String objects. When deduplicating, a lookup is made in this table to see
+// if there is already an identical character array somewhere on the heap. If so, the
+// String object is adjusted to point to that character array, releasing the reference
+// to the original array allowing it to eventually be garbage collected. If the lookup
+// fails the character array is instead inserted into the hashtable so that this array
+// can be shared at some point in the future.
+//
+// Candidate selection
+//
+// An object is considered a deduplication candidate if all of the following
+// statements are true:
+//
+// - The object is an instance of java.lang.String
+//
+// - The object is being evacuated from a young heap region
+//
+// - The object is being evacuated to a young/survivor heap region and the
+//   object's age is equal to the deduplication age threshold
+//
+//   or
+//
+//   The object is being evacuated to an old heap region and the object's age is
+//   less than the deduplication age threshold
+//
+// Once an string object has been promoted to an old region, or its age is higher
+// than the deduplication age threshold, is will never become a candidate again.
+// This approach avoids making the same object a candidate more than once.
+//
+// Interned strings are a bit special. They are explicitly deduplicated just before
+// being inserted into the StringTable (to avoid counteracting C2 optimizations done
+// on string literals), then they also become deduplication candidates if they reach
+// the deduplication age threshold or are evacuated to an old heap region. The second
+// attempt to deduplicate such strings will be in vain, but we have no fast way of
+// filtering them out. This has not shown to be a problem, as the number of interned
+// strings is usually dwarfed by the number of normal (non-interned) strings.
+//
+// For additional information on string deduplication, please see JEP 192,
+// http://openjdk.java.net/jeps/192
+//
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+
+class OopClosure;
+class BoolObjectClosure;
+class ThreadClosure;
+class outputStream;
+class G1StringDedupTable;
+
+//
+// Main interface for interacting with string deduplication.
+//
+class G1StringDedup : public AllStatic {
+private:
+  // Single state for checking if both G1 and string deduplication is enabled.
+  static bool _enabled;
+
+  // Candidate selection policies, returns true if the given object is
+  // candidate for string deduplication.
+  static bool is_candidate_from_mark(oop obj);
+  static bool is_candidate_from_evacuation(bool from_young, bool to_young, oop obj);
+
+public:
+  // Returns true if both G1 and string deduplication is enabled.
+  static bool is_enabled() {
+    return _enabled;
+  }
+
+  static void initialize();
+
+  // Immediately deduplicates the given String object, bypassing the
+  // the deduplication queue.
+  static void deduplicate(oop java_string);
+
+  // Enqueues a deduplication candidate for later processing by the deduplication
+  // thread. Before enqueuing, these functions apply the appropriate candidate
+  // selection policy to filters out non-candidates.
+  static void enqueue_from_mark(oop java_string);
+  static void enqueue_from_evacuation(bool from_young, bool to_young,
+                                      unsigned int queue, oop java_string);
+
+  static void oops_do(OopClosure* keep_alive);
+  static void unlink(BoolObjectClosure* is_alive);
+  static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
+                                bool allow_resize_and_rehash = true);
+
+  static void threads_do(ThreadClosure* tc);
+  static void print_worker_threads_on(outputStream* st);
+  static void verify();
+};
+
+//
+// This closure encapsulates the state and the closures needed when scanning
+// the deduplication queue and table during the unlink_or_oops_do() operation.
+// A single instance of this closure is created and then shared by all worker
+// threads participating in the scan. The _next_queue and _next_bucket fields
+// provide a simple mechanism for GC workers to claim exclusive access to a
+// queue or a table partition.
+//
+class G1StringDedupUnlinkOrOopsDoClosure : public StackObj {
+private:
+  BoolObjectClosure*  _is_alive;
+  OopClosure*         _keep_alive;
+  G1StringDedupTable* _resized_table;
+  G1StringDedupTable* _rehashed_table;
+  size_t              _next_queue;
+  size_t              _next_bucket;
+
+public:
+  G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive,
+                                     OopClosure* keep_alive,
+                                     bool allow_resize_and_rehash);
+  ~G1StringDedupUnlinkOrOopsDoClosure();
+
+  bool is_resizing() {
+    return _resized_table != NULL;
+  }
+
+  G1StringDedupTable* resized_table() {
+    return _resized_table;
+  }
+
+  bool is_rehashing() {
+    return _rehashed_table != NULL;
+  }
+
+  // Atomically claims the next available queue for exclusive access by
+  // the current thread. Returns the queue number of the claimed queue.
+  size_t claim_queue() {
+    return (size_t)Atomic::add_ptr(1, &_next_queue) - 1;
+  }
+
+  // Atomically claims the next available table partition for exclusive
+  // access by the current thread. Returns the table bucket number where
+  // the claimed partition starts.
+  size_t claim_table_partition(size_t partition_size) {
+    return (size_t)Atomic::add_ptr(partition_size, &_next_bucket) - partition_size;
+  }
+
+  // Applies and returns the result from the is_alive closure, or
+  // returns true if no such closure was provided.
+  bool is_alive(oop o) {
+    if (_is_alive != NULL) {
+      return _is_alive->do_object_b(o);
+    }
+    return true;
+  }
+
+  // Applies the keep_alive closure, or does nothing if no such
+  // closure was provided.
+  void keep_alive(oop* p) {
+    if (_keep_alive != NULL) {
+      _keep_alive->do_oop(p);
+    }
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupQueue.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+#include "memory/gcLocker.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "utilities/stack.inline.hpp"
+
+G1StringDedupQueue* G1StringDedupQueue::_queue = NULL;
+const size_t        G1StringDedupQueue::_max_size = 1000000; // Max number of elements per queue
+const size_t        G1StringDedupQueue::_max_cache_size = 0; // Max cache size per queue
+
+G1StringDedupQueue::G1StringDedupQueue() :
+  _cursor(0),
+  _empty(true),
+  _dropped(0) {
+  _nqueues = MAX2(ParallelGCThreads, (size_t)1);
+  _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC);
+  for (size_t i = 0; i < _nqueues; i++) {
+    new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size);
+  }
+}
+
+G1StringDedupQueue::~G1StringDedupQueue() {
+  ShouldNotReachHere();
+}
+
+void G1StringDedupQueue::create() {
+  assert(_queue == NULL, "One string deduplication queue allowed");
+  _queue = new G1StringDedupQueue();
+}
+
+void G1StringDedupQueue::wait() {
+  MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag);
+  while (_queue->_empty) {
+    ml.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void G1StringDedupQueue::push(uint worker_id, oop java_string) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint");
+  assert(worker_id < _queue->_nqueues, "Invalid queue");
+
+  // Push and notify waiter
+  G1StringDedupWorkerQueue& worker_queue = _queue->_queues[worker_id];
+  if (!worker_queue.is_full()) {
+    worker_queue.push(java_string);
+    if (_queue->_empty) {
+      MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag);
+      if (_queue->_empty) {
+        // Mark non-empty and notify waiter
+        _queue->_empty = false;
+        ml.notify();
+      }
+    }
+  } else {
+    // Queue is full, drop the string and update the statistics
+    Atomic::inc_ptr(&_queue->_dropped);
+  }
+}
+
+oop G1StringDedupQueue::pop() {
+  assert(!SafepointSynchronize::is_at_safepoint(), "Must not be at safepoint");
+  No_Safepoint_Verifier nsv;
+
+  // Try all queues before giving up
+  for (size_t tries = 0; tries < _queue->_nqueues; tries++) {
+    // The cursor indicates where we left of last time
+    G1StringDedupWorkerQueue* queue = &_queue->_queues[_queue->_cursor];
+    while (!queue->is_empty()) {
+      oop obj = queue->pop();
+      // The oop we pop can be NULL if it was marked
+      // dead. Just ignore those and pop the next oop.
+      if (obj != NULL) {
+        return obj;
+      }
+    }
+
+    // Try next queue
+    _queue->_cursor = (_queue->_cursor + 1) % _queue->_nqueues;
+  }
+
+  // Mark empty
+  _queue->_empty = true;
+
+  return NULL;
+}
+
+void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl) {
+  // A worker thread first claims a queue, which ensures exclusive
+  // access to that queue, then continues to process it.
+  for (;;) {
+    // Grab next queue to scan
+    size_t queue = cl->claim_queue();
+    if (queue >= _queue->_nqueues) {
+      // End of queues
+      break;
+    }
+
+    // Scan the queue
+    unlink_or_oops_do(cl, queue);
+  }
+}
+
+void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue) {
+  assert(queue < _queue->_nqueues, "Invalid queue");
+  StackIterator<oop, mtGC> iter(_queue->_queues[queue]);
+  while (!iter.is_empty()) {
+    oop* p = iter.next_addr();
+    if (*p != NULL) {
+      if (cl->is_alive(*p)) {
+        cl->keep_alive(p);
+      } else {
+        // Clear dead reference
+        *p = NULL;
+      }
+    }
+  }
+}
+
+void G1StringDedupQueue::print_statistics(outputStream* st) {
+  st->print_cr(
+    "   [Queue]\n"
+    "      [Dropped: "UINTX_FORMAT"]", _queue->_dropped);
+}
+
+void G1StringDedupQueue::verify() {
+  for (size_t i = 0; i < _queue->_nqueues; i++) {
+    StackIterator<oop, mtGC> iter(_queue->_queues[i]);
+    while (!iter.is_empty()) {
+      oop obj = iter.next();
+      if (obj != NULL) {
+        guarantee(Universe::heap()->is_in_reserved(obj), "Object must be on the heap");
+        guarantee(!obj->is_forwarded(), "Object must not be forwarded");
+        guarantee(java_lang_String::is_instance(obj), "Object must be a String");
+      }
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupQueue.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "utilities/stack.hpp"
+
+class G1StringDedupUnlinkOrOopsDoClosure;
+
+//
+// The deduplication queue acts as the communication channel between the stop-the-world
+// mark/evacuation phase and the concurrent deduplication phase. Deduplication candidates
+// found during mark/evacuation are placed on this queue for later processing in the
+// deduplication thread. A queue entry is an oop pointing to a String object (as opposed
+// to entries in the deduplication hashtable which points to character arrays).
+//
+// While users of the queue treat it as a single queue, it is implemented as a set of
+// queues, one queue per GC worker thread, to allow lock-free and cache-friendly enqueue
+// operations by the GC workers.
+//
+// The oops in the queue are treated as weak pointers, meaning the objects they point to
+// can become unreachable and pruned (cleared) before being popped by the deduplication
+// thread.
+//
+// Pushing to the queue is thread safe (this relies on each thread using a unique worker
+// id), but only allowed during a safepoint. Popping from the queue is NOT thread safe
+// and can only be done by the deduplication thread outside a safepoint.
+//
+// The StringDedupQueue_lock is only used for blocking and waking up the deduplication
+// thread in case the queue is empty or becomes non-empty, respectively. This lock does
+// not otherwise protect the queue content.
+//
+class G1StringDedupQueue : public CHeapObj<mtGC> {
+private:
+  typedef Stack<oop, mtGC> G1StringDedupWorkerQueue;
+
+  static G1StringDedupQueue* _queue;
+  static const size_t        _max_size;
+  static const size_t        _max_cache_size;
+
+  G1StringDedupWorkerQueue*  _queues;
+  size_t                     _nqueues;
+  size_t                     _cursor;
+  volatile bool              _empty;
+
+  // Statistics counter, only used for logging.
+  uintx                      _dropped;
+
+  G1StringDedupQueue();
+  ~G1StringDedupQueue();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue);
+
+public:
+  static void create();
+
+  // Blocks and waits for the queue to become non-empty.
+  static void wait();
+
+  // Pushes a deduplication candidate onto a specific GC worker queue.
+  static void push(uint worker_id, oop java_string);
+
+  // Pops a deduplication candidate from any queue, returns NULL if
+  // all queues are empty.
+  static oop pop();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl);
+
+  static void print_statistics(outputStream* st);
+  static void verify();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupStat.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+
+G1StringDedupStat::G1StringDedupStat() :
+  _inspected(0),
+  _skipped(0),
+  _hashed(0),
+  _known(0),
+  _new(0),
+  _new_bytes(0),
+  _deduped(0),
+  _deduped_bytes(0),
+  _deduped_young(0),
+  _deduped_young_bytes(0),
+  _deduped_old(0),
+  _deduped_old_bytes(0),
+  _idle(0),
+  _exec(0),
+  _block(0),
+  _start(0.0),
+  _idle_elapsed(0.0),
+  _exec_elapsed(0.0),
+  _block_elapsed(0.0) {
+}
+
+void G1StringDedupStat::add(const G1StringDedupStat& stat) {
+  _inspected           += stat._inspected;
+  _skipped             += stat._skipped;
+  _hashed              += stat._hashed;
+  _known               += stat._known;
+  _new                 += stat._new;
+  _new_bytes           += stat._new_bytes;
+  _deduped             += stat._deduped;
+  _deduped_bytes       += stat._deduped_bytes;
+  _deduped_young       += stat._deduped_young;
+  _deduped_young_bytes += stat._deduped_young_bytes;
+  _deduped_old         += stat._deduped_old;
+  _deduped_old_bytes   += stat._deduped_old_bytes;
+  _idle                += stat._idle;
+  _exec                += stat._exec;
+  _block               += stat._block;
+  _idle_elapsed        += stat._idle_elapsed;
+  _exec_elapsed        += stat._exec_elapsed;
+  _block_elapsed       += stat._block_elapsed;
+}
+
+void G1StringDedupStat::print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) {
+  double total_deduped_bytes_percent = 0.0;
+
+  if (total_stat._new_bytes > 0) {
+    // Avoid division by zero
+    total_deduped_bytes_percent = (double)total_stat._deduped_bytes / (double)total_stat._new_bytes * 100.0;
+  }
+
+  st->date_stamp(PrintGCDateStamps);
+  st->stamp(PrintGCTimeStamps);
+  st->print_cr(
+    "[GC concurrent-string-deduplication, "
+    G1_STRDEDUP_BYTES_FORMAT_NS"->"G1_STRDEDUP_BYTES_FORMAT_NS"("G1_STRDEDUP_BYTES_FORMAT_NS"), avg "
+    G1_STRDEDUP_PERCENT_FORMAT_NS", "G1_STRDEDUP_TIME_FORMAT"]",
+    G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes),
+    G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes - last_stat._deduped_bytes),
+    G1_STRDEDUP_BYTES_PARAM(last_stat._deduped_bytes),
+    total_deduped_bytes_percent,
+    last_stat._exec_elapsed);
+}
+
+void G1StringDedupStat::print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total) {
+  double young_percent               = 0.0;
+  double old_percent                 = 0.0;
+  double skipped_percent             = 0.0;
+  double hashed_percent              = 0.0;
+  double known_percent               = 0.0;
+  double new_percent                 = 0.0;
+  double deduped_percent             = 0.0;
+  double deduped_bytes_percent       = 0.0;
+  double deduped_young_percent       = 0.0;
+  double deduped_young_bytes_percent = 0.0;
+  double deduped_old_percent         = 0.0;
+  double deduped_old_bytes_percent   = 0.0;
+
+  if (stat._inspected > 0) {
+    // Avoid division by zero
+    skipped_percent = (double)stat._skipped / (double)stat._inspected * 100.0;
+    hashed_percent  = (double)stat._hashed / (double)stat._inspected * 100.0;
+    known_percent   = (double)stat._known / (double)stat._inspected * 100.0;
+    new_percent     = (double)stat._new / (double)stat._inspected * 100.0;
+  }
+
+  if (stat._new > 0) {
+    // Avoid division by zero
+    deduped_percent = (double)stat._deduped / (double)stat._new * 100.0;
+  }
+
+  if (stat._deduped > 0) {
+    // Avoid division by zero
+    deduped_young_percent = (double)stat._deduped_young / (double)stat._deduped * 100.0;
+    deduped_old_percent   = (double)stat._deduped_old / (double)stat._deduped * 100.0;
+  }
+
+  if (stat._new_bytes > 0) {
+    // Avoid division by zero
+    deduped_bytes_percent = (double)stat._deduped_bytes / (double)stat._new_bytes * 100.0;
+  }
+
+  if (stat._deduped_bytes > 0) {
+    // Avoid division by zero
+    deduped_young_bytes_percent = (double)stat._deduped_young_bytes / (double)stat._deduped_bytes * 100.0;
+    deduped_old_bytes_percent   = (double)stat._deduped_old_bytes / (double)stat._deduped_bytes * 100.0;
+  }
+
+  if (total) {
+    st->print_cr(
+      "   [Total Exec: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Idle: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]",
+      stat._exec, stat._exec_elapsed, stat._idle, stat._idle_elapsed, stat._block, stat._block_elapsed);
+  } else {
+    st->print_cr(
+      "   [Last Exec: "G1_STRDEDUP_TIME_FORMAT", Idle: "G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]",
+      stat._exec_elapsed, stat._idle_elapsed, stat._block, stat._block_elapsed);
+  }
+  st->print_cr(
+    "      [Inspected:    "G1_STRDEDUP_OBJECTS_FORMAT"]\n"
+    "         [Skipped:   "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Hashed:    "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Known:     "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [New:       "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"]\n"
+    "      [Deduplicated: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Young:     "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Old:       "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]",
+    stat._inspected,
+    stat._skipped, skipped_percent,
+    stat._hashed, hashed_percent,
+    stat._known, known_percent,
+    stat._new, new_percent, G1_STRDEDUP_BYTES_PARAM(stat._new_bytes),
+    stat._deduped, deduped_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_bytes), deduped_bytes_percent,
+    stat._deduped_young, deduped_young_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_young_bytes), deduped_young_bytes_percent,
+    stat._deduped_old, deduped_old_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_old_bytes), deduped_old_bytes_percent);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupStat.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"
+
+// Macros for GC log output formating
+#define G1_STRDEDUP_OBJECTS_FORMAT         UINTX_FORMAT_W(12)
+#define G1_STRDEDUP_TIME_FORMAT            "%1.7lf secs"
+#define G1_STRDEDUP_PERCENT_FORMAT         "%5.1lf%%"
+#define G1_STRDEDUP_PERCENT_FORMAT_NS      "%.1lf%%"
+#define G1_STRDEDUP_BYTES_FORMAT           "%8.1lf%s"
+#define G1_STRDEDUP_BYTES_FORMAT_NS        "%.1lf%s"
+#define G1_STRDEDUP_BYTES_PARAM(bytes)     byte_size_in_proper_unit((double)(bytes)), proper_unit_for_byte_size((bytes))
+
+//
+// Statistics gathered by the deduplication thread.
+//
+class G1StringDedupStat : public StackObj {
+private:
+  // Counters
+  uintx  _inspected;
+  uintx  _skipped;
+  uintx  _hashed;
+  uintx  _known;
+  uintx  _new;
+  uintx  _new_bytes;
+  uintx  _deduped;
+  uintx  _deduped_bytes;
+  uintx  _deduped_young;
+  uintx  _deduped_young_bytes;
+  uintx  _deduped_old;
+  uintx  _deduped_old_bytes;
+  uintx  _idle;
+  uintx  _exec;
+  uintx  _block;
+
+  // Time spent by the deduplication thread in different phases
+  double _start;
+  double _idle_elapsed;
+  double _exec_elapsed;
+  double _block_elapsed;
+
+public:
+  G1StringDedupStat();
+
+  void inc_inspected() {
+    _inspected++;
+  }
+
+  void inc_skipped() {
+    _skipped++;
+  }
+
+  void inc_hashed() {
+    _hashed++;
+  }
+
+  void inc_known() {
+    _known++;
+  }
+
+  void inc_new(uintx bytes) {
+    _new++;
+    _new_bytes += bytes;
+  }
+
+  void inc_deduped_young(uintx bytes) {
+    _deduped++;
+    _deduped_bytes += bytes;
+    _deduped_young++;
+    _deduped_young_bytes += bytes;
+  }
+
+  void inc_deduped_old(uintx bytes) {
+    _deduped++;
+    _deduped_bytes += bytes;
+    _deduped_old++;
+    _deduped_old_bytes += bytes;
+  }
+
+  void mark_idle() {
+    _start = os::elapsedTime();
+    _idle++;
+  }
+
+  void mark_exec() {
+    double now = os::elapsedTime();
+    _idle_elapsed = now - _start;
+    _start = now;
+    _exec++;
+  }
+
+  void mark_block() {
+    double now = os::elapsedTime();
+    _exec_elapsed += now - _start;
+    _start = now;
+    _block++;
+  }
+
+  void mark_unblock() {
+    double now = os::elapsedTime();
+    _block_elapsed += now - _start;
+    _start = now;
+  }
+
+  void mark_done() {
+    double now = os::elapsedTime();
+    _exec_elapsed += now - _start;
+  }
+
+  void add(const G1StringDedupStat& stat);
+
+  static void print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat);
+  static void print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total);
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "memory/gcLocker.hpp"
+#include "memory/padded.inline.hpp"
+#include "oops/typeArrayOop.hpp"
+#include "runtime/mutexLocker.hpp"
+
+//
+// Freelist in the deduplication table entry cache. Links table
+// entries together using their _next fields.
+//
+class G1StringDedupEntryFreeList : public CHeapObj<mtGC> {
+private:
+  G1StringDedupEntry* _list;
+  size_t              _length;
+
+public:
+  G1StringDedupEntryFreeList() :
+    _list(NULL),
+    _length(0) {
+  }
+
+  void add(G1StringDedupEntry* entry) {
+    entry->set_next(_list);
+    _list = entry;
+    _length++;
+  }
+
+  G1StringDedupEntry* remove() {
+    G1StringDedupEntry* entry = _list;
+    if (entry != NULL) {
+      _list = entry->next();
+      _length--;
+    }
+    return entry;
+  }
+
+  size_t length() {
+    return _length;
+  }
+};
+
+//
+// Cache of deduplication table entries. This cache provides fast allocation and
+// reuse of table entries to lower the pressure on the underlying allocator.
+// But more importantly, it provides fast/deferred freeing of table entries. This
+// is important because freeing of table entries is done during stop-the-world
+// phases and it is not uncommon for large number of entries to be freed at once.
+// Tables entries that are freed during these phases are placed onto a freelist in
+// the cache. The deduplication thread, which executes in a concurrent phase, will
+// later reuse or free the underlying memory for these entries.
+//
+// The cache allows for single-threaded allocations and multi-threaded frees.
+// Allocations are synchronized by StringDedupTable_lock as part of a table
+// modification.
+//
+class G1StringDedupEntryCache : public CHeapObj<mtGC> {
+private:
+  // One freelist per GC worker to allow lock less freeing of
+  // entries while doing a parallel scan of the table. Using
+  // PaddedEnd to avoid false sharing.
+  PaddedEnd<G1StringDedupEntryFreeList>* _lists;
+  size_t                                 _nlists;
+
+public:
+  G1StringDedupEntryCache();
+  ~G1StringDedupEntryCache();
+
+  // Get a table entry from the cache freelist, or allocate a new
+  // entry if the cache is empty.
+  G1StringDedupEntry* alloc();
+
+  // Insert a table entry into the cache freelist.
+  void free(G1StringDedupEntry* entry, uint worker_id);
+
+  // Returns current number of entries in the cache.
+  size_t size();
+
+  // If the cache has grown above the given max size, trim it down
+  // and deallocate the memory occupied by trimmed of entries.
+  void trim(size_t max_size);
+};
+
+G1StringDedupEntryCache::G1StringDedupEntryCache() {
+  _nlists = MAX2(ParallelGCThreads, (size_t)1);
+  _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
+}
+
+G1StringDedupEntryCache::~G1StringDedupEntryCache() {
+  ShouldNotReachHere();
+}
+
+G1StringDedupEntry* G1StringDedupEntryCache::alloc() {
+  for (size_t i = 0; i < _nlists; i++) {
+    G1StringDedupEntry* entry = _lists[i].remove();
+    if (entry != NULL) {
+      return entry;
+    }
+  }
+  return new G1StringDedupEntry();
+}
+
+void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) {
+  assert(entry->obj() != NULL, "Double free");
+  assert(worker_id < _nlists, "Invalid worker id");
+  entry->set_obj(NULL);
+  entry->set_hash(0);
+  _lists[worker_id].add(entry);
+}
+
+size_t G1StringDedupEntryCache::size() {
+  size_t size = 0;
+  for (size_t i = 0; i < _nlists; i++) {
+    size += _lists[i].length();
+  }
+  return size;
+}
+
+void G1StringDedupEntryCache::trim(size_t max_size) {
+  size_t cache_size = 0;
+  for (size_t i = 0; i < _nlists; i++) {
+    G1StringDedupEntryFreeList* list = &_lists[i];
+    cache_size += list->length();
+    while (cache_size > max_size) {
+      G1StringDedupEntry* entry = list->remove();
+      assert(entry != NULL, "Should not be null");
+      cache_size--;
+      delete entry;
+    }
+  }
+}
+
+G1StringDedupTable*      G1StringDedupTable::_table = NULL;
+G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL;
+
+const size_t             G1StringDedupTable::_min_size = (1 << 10);   // 1024
+const size_t             G1StringDedupTable::_max_size = (1 << 24);   // 16777216
+const double             G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load
+const double             G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load
+const double             G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size
+const uintx              G1StringDedupTable::_rehash_multiple = 60;   // Hash bucket has 60 times more collisions than expected
+const uintx              G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor);
+
+uintx                    G1StringDedupTable::_entries_added = 0;
+uintx                    G1StringDedupTable::_entries_removed = 0;
+uintx                    G1StringDedupTable::_resize_count = 0;
+uintx                    G1StringDedupTable::_rehash_count = 0;
+
+G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) :
+  _size(size),
+  _entries(0),
+  _grow_threshold((uintx)(size * _grow_load_factor)),
+  _shrink_threshold((uintx)(size * _shrink_load_factor)),
+  _rehash_needed(false),
+  _hash_seed(hash_seed) {
+  assert(is_power_of_2(size), "Table size must be a power of 2");
+  _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
+  memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
+}
+
+G1StringDedupTable::~G1StringDedupTable() {
+  FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets, mtGC);
+}
+
+void G1StringDedupTable::create() {
+  assert(_table == NULL, "One string deduplication table allowed");
+  _entry_cache = new G1StringDedupEntryCache();
+  _table = new G1StringDedupTable(_min_size);
+}
+
+void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) {
+  G1StringDedupEntry* entry = _entry_cache->alloc();
+  entry->set_obj(value);
+  entry->set_hash(hash);
+  entry->set_next(*list);
+  *list = entry;
+  _entries++;
+}
+
+void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
+  G1StringDedupEntry* entry = *pentry;
+  *pentry = entry->next();
+  _entry_cache->free(entry, worker_id);
+}
+
+void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
+  G1StringDedupEntry* entry = *pentry;
+  *pentry = entry->next();
+  unsigned int hash = entry->hash();
+  size_t index = dest->hash_to_index(hash);
+  G1StringDedupEntry** list = dest->bucket(index);
+  entry->set_next(*list);
+  *list = entry;
+}
+
+bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
+  return (value1 == value2 ||
+          (value1->length() == value2->length() &&
+           (!memcmp(value1->base(T_CHAR),
+                    value2->base(T_CHAR),
+                    value1->length() * sizeof(jchar)))));
+}
+
+typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash,
+                                        G1StringDedupEntry** list, uintx &count) {
+  for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
+    if (entry->hash() == hash) {
+      typeArrayOop existing_value = entry->obj();
+      if (equals(value, existing_value)) {
+        // Match found
+        return existing_value;
+      }
+    }
+    count++;
+  }
+
+  // Not found
+  return NULL;
+}
+
+typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) {
+  size_t index = hash_to_index(hash);
+  G1StringDedupEntry** list = bucket(index);
+  uintx count = 0;
+
+  // Lookup in list
+  typeArrayOop existing_value = lookup(value, hash, list, count);
+
+  // Check if rehash is needed
+  if (count > _rehash_threshold) {
+    _rehash_needed = true;
+  }
+
+  if (existing_value == NULL) {
+    // Not found, add new entry
+    add(value, hash, list);
+
+    // Update statistics
+    _entries_added++;
+  }
+
+  return existing_value;
+}
+
+unsigned int G1StringDedupTable::hash_code(typeArrayOop value) {
+  unsigned int hash;
+  int length = value->length();
+  const jchar* data = (jchar*)value->base(T_CHAR);
+
+  if (use_java_hash()) {
+    hash = java_lang_String::hash_code(data, length);
+  } else {
+    hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
+  }
+
+  return hash;
+}
+
+void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
+  assert(java_lang_String::is_instance(java_string), "Must be a string");
+  No_Safepoint_Verifier nsv;
+
+  stat.inc_inspected();
+
+  typeArrayOop value = java_lang_String::value(java_string);
+  if (value == NULL) {
+    // String has no value
+    stat.inc_skipped();
+    return;
+  }
+
+  unsigned int hash = 0;
+
+  if (use_java_hash()) {
+    // Get hash code from cache
+    hash = java_lang_String::hash(java_string);
+  }
+
+  if (hash == 0) {
+    // Compute hash
+    hash = hash_code(value);
+    stat.inc_hashed();
+  }
+
+  if (use_java_hash() && hash != 0) {
+    // Store hash code in cache
+    java_lang_String::set_hash(java_string, hash);
+  }
+
+  typeArrayOop existing_value = lookup_or_add(value, hash);
+  if (existing_value == value) {
+    // Same value, already known
+    stat.inc_known();
+    return;
+  }
+
+  // Get size of value array
+  uintx size_in_bytes = value->size() * HeapWordSize;
+  stat.inc_new(size_in_bytes);
+
+  if (existing_value != NULL) {
+    // Enqueue the reference to make sure it is kept alive. Concurrent mark might
+    // otherwise declare it dead if there are no other strong references to this object.
+    G1SATBCardTableModRefBS::enqueue(existing_value);
+
+    // Existing value found, deduplicate string
+    java_lang_String::set_value(java_string, existing_value);
+
+    if (G1CollectedHeap::heap()->is_in_young(value)) {
+      stat.inc_deduped_young(size_in_bytes);
+    } else {
+      stat.inc_deduped_old(size_in_bytes);
+    }
+  }
+}
+
+G1StringDedupTable* G1StringDedupTable::prepare_resize() {
+  size_t size = _table->_size;
+
+  // Check if the hashtable needs to be resized
+  if (_table->_entries > _table->_grow_threshold) {
+    // Grow table, double the size
+    size *= 2;
+    if (size > _max_size) {
+      // Too big, don't resize
+      return NULL;
+    }
+  } else if (_table->_entries < _table->_shrink_threshold) {
+    // Shrink table, half the size
+    size /= 2;
+    if (size < _min_size) {
+      // Too small, don't resize
+      return NULL;
+    }
+  } else if (StringDeduplicationResizeALot) {
+    // Force grow
+    size *= 2;
+    if (size > _max_size) {
+      // Too big, force shrink instead
+      size /= 4;
+    }
+  } else {
+    // Resize not needed
+    return NULL;
+  }
+
+  // Update statistics
+  _resize_count++;
+
+  // Allocate the new table. The new table will be populated by workers
+  // calling unlink_or_oops_do() and finally installed by finish_resize().
+  return new G1StringDedupTable(size, _table->_hash_seed);
+}
+
+void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) {
+  assert(resized_table != NULL, "Invalid table");
+
+  resized_table->_entries = _table->_entries;
+
+  // Free old table
+  delete _table;
+
+  // Install new table
+  _table = resized_table;
+}
+
+void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) {
+  // The table is divided into partitions to allow lock-less parallel processing by
+  // multiple worker threads. A worker thread first claims a partition, which ensures
+  // exclusive access to that part of the table, then continues to process it. To allow
+  // shrinking of the table in parallel we also need to make sure that the same worker
+  // thread processes all partitions where entries will hash to the same destination
+  // partition. Since the table size is always a power of two and we always shrink by
+  // dividing the table in half, we know that for a given partition there is only one
+  // other partition whoes entries will hash to the same destination partition. That
+  // other partition is always the sibling partition in the second half of the table.
+  // For example, if the table is divided into 8 partitions, the sibling of partition 0
+  // is partition 4, the sibling of partition 1 is partition 5, etc.
+  size_t table_half = _table->_size / 2;
+
+  // Let each partition be one page worth of buckets
+  size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*));
+  assert(table_half % partition_size == 0, "Invalid partition size");
+
+  // Number of entries removed during the scan
+  uintx removed = 0;
+
+  for (;;) {
+    // Grab next partition to scan
+    size_t partition_begin = cl->claim_table_partition(partition_size);
+    size_t partition_end = partition_begin + partition_size;
+    if (partition_begin >= table_half) {
+      // End of table
+      break;
+    }
+
+    // Scan the partition followed by the sibling partition in the second half of the table
+    removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id);
+    removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id);
+  }
+
+  // Delayed update avoid contention on the table lock
+  if (removed > 0) {
+    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+    _table->_entries -= removed;
+    _entries_removed += removed;
+  }
+}
+
+uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
+                                            size_t partition_begin,
+                                            size_t partition_end,
+                                            uint worker_id) {
+  uintx removed = 0;
+  for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      oop* p = (oop*)(*entry)->obj_addr();
+      if (cl->is_alive(*p)) {
+        cl->keep_alive(p);
+        if (cl->is_resizing()) {
+          // We are resizing the table, transfer entry to the new table
+          _table->transfer(entry, cl->resized_table());
+        } else {
+          if (cl->is_rehashing()) {
+            // We are rehashing the table, rehash the entry but keep it
+            // in the table. We can't transfer entries into the new table
+            // at this point since we don't have exclusive access to all
+            // destination partitions. finish_rehash() will do a single
+            // threaded transfer of all entries.
+            typeArrayOop value = (typeArrayOop)*p;
+            unsigned int hash = hash_code(value);
+            (*entry)->set_hash(hash);
+          }
+
+          // Move to next entry
+          entry = (*entry)->next_addr();
+        }
+      } else {
+        // Not alive, remove entry from table
+        _table->remove(entry, worker_id);
+        removed++;
+      }
+    }
+  }
+
+  return removed;
+}
+
+G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
+  if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
+    // Rehash not needed
+    return NULL;
+  }
+
+  // Update statistics
+  _rehash_count++;
+
+  // Compute new hash seed
+  _table->_hash_seed = AltHashing::compute_seed();
+
+  // Allocate the new table, same size and hash seed
+  return new G1StringDedupTable(_table->_size, _table->_hash_seed);
+}
+
+void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) {
+  assert(rehashed_table != NULL, "Invalid table");
+
+  // Move all newly rehashed entries into the correct buckets in the new table
+  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      _table->transfer(entry, rehashed_table);
+    }
+  }
+
+  rehashed_table->_entries = _table->_entries;
+
+  // Free old table
+  delete _table;
+
+  // Install new table
+  _table = rehashed_table;
+}
+
+void G1StringDedupTable::verify() {
+  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
+    // Verify entries
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      typeArrayOop value = (*entry)->obj();
+      guarantee(value != NULL, "Object must not be NULL");
+      guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap");
+      guarantee(!value->is_forwarded(), "Object must not be forwarded");
+      guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
+      unsigned int hash = hash_code(value);
+      guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
+      guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
+      entry = (*entry)->next_addr();
+    }
+
+    // Verify that we do not have entries with identical oops or identical arrays.
+    // We only need to compare entries in the same bucket. If the same oop or an
+    // identical array has been inserted more than once into different/incorrect
+    // buckets the verification step above will catch that.
+    G1StringDedupEntry** entry1 = _table->bucket(bucket);
+    while (*entry1 != NULL) {
+      typeArrayOop value1 = (*entry1)->obj();
+      G1StringDedupEntry** entry2 = (*entry1)->next_addr();
+      while (*entry2 != NULL) {
+        typeArrayOop value2 = (*entry2)->obj();
+        guarantee(!equals(value1, value2), "Table entries must not have identical arrays");
+        entry2 = (*entry2)->next_addr();
+      }
+      entry1 = (*entry1)->next_addr();
+    }
+  }
+}
+
+void G1StringDedupTable::trim_entry_cache() {
+  MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+  size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
+  _entry_cache->trim(max_cache_size);
+}
+
+void G1StringDedupTable::print_statistics(outputStream* st) {
+  st->print_cr(
+    "   [Table]\n"
+    "      [Memory Usage: "G1_STRDEDUP_BYTES_FORMAT_NS"]\n"
+    "      [Size: "SIZE_FORMAT", Min: "SIZE_FORMAT", Max: "SIZE_FORMAT"]\n"
+    "      [Entries: "UINTX_FORMAT", Load: "G1_STRDEDUP_PERCENT_FORMAT_NS", Cached: " UINTX_FORMAT ", Added: "UINTX_FORMAT", Removed: "UINTX_FORMAT"]\n"
+    "      [Resize Count: "UINTX_FORMAT", Shrink Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS"), Grow Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS")]\n"
+    "      [Rehash Count: "UINTX_FORMAT", Rehash Threshold: "UINTX_FORMAT", Hash Seed: 0x%x]\n"
+    "      [Age Threshold: "UINTX_FORMAT"]",
+    G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)),
+    _table->_size, _min_size, _max_size,
+    _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed,
+    _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0,
+    _rehash_count, _rehash_threshold, _table->_hash_seed,
+    StringDeduplicationAgeThreshold);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
+
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "runtime/mutexLocker.hpp"
+
+class G1StringDedupEntryCache;
+
+//
+// Table entry in the deduplication hashtable. Points weakly to the
+// character array. Can be chained in a linked list in case of hash
+// collisions or when placed in a freelist in the entry cache.
+//
+class G1StringDedupEntry : public CHeapObj<mtGC> {
+private:
+  G1StringDedupEntry* _next;
+  unsigned int      _hash;
+  typeArrayOop      _obj;
+
+public:
+  G1StringDedupEntry() :
+    _next(NULL),
+    _hash(0),
+    _obj(NULL) {
+  }
+
+  G1StringDedupEntry* next() {
+    return _next;
+  }
+
+  G1StringDedupEntry** next_addr() {
+    return &_next;
+  }
+
+  void set_next(G1StringDedupEntry* next) {
+    _next = next;
+  }
+
+  unsigned int hash() {
+    return _hash;
+  }
+
+  void set_hash(unsigned int hash) {
+    _hash = hash;
+  }
+
+  typeArrayOop obj() {
+    return _obj;
+  }
+
+  typeArrayOop* obj_addr() {
+    return &_obj;
+  }
+
+  void set_obj(typeArrayOop obj) {
+    _obj = obj;
+  }
+};
+
+//
+// The deduplication hashtable keeps track of all unique character arrays used
+// by String objects. Each table entry weakly points to an character array, allowing
+// otherwise unreachable character arrays to be declared dead and pruned from the
+// table.
+//
+// The table is dynamically resized to accommodate the current number of table entries.
+// The table has hash buckets with chains for hash collision. If the average chain
+// length goes above or below given thresholds the table grows or shrinks accordingly.
+//
+// The table is also dynamically rehashed (using a new hash seed) if it becomes severely
+// unbalanced, i.e., a hash chain is significantly longer than average.
+//
+// All access to the table is protected by the StringDedupTable_lock, except under
+// safepoints in which case GC workers are allowed to access a table partitions they
+// have claimed without first acquiring the lock. Note however, that this applies only
+// the table partition (i.e. a range of elements in _buckets), not other parts of the
+// table such as the _entries field, statistics counters, etc.
+//
+class G1StringDedupTable : public CHeapObj<mtGC> {
+private:
+  // The currently active hashtable instance. Only modified when
+  // the table is resizes or rehashed.
+  static G1StringDedupTable*      _table;
+
+  // Cache for reuse and fast alloc/free of table entries.
+  static G1StringDedupEntryCache* _entry_cache;
+
+  G1StringDedupEntry**            _buckets;
+  size_t                          _size;
+  uintx                           _entries;
+  uintx                           _shrink_threshold;
+  uintx                           _grow_threshold;
+  bool                            _rehash_needed;
+
+  // The hash seed also dictates which hash function to use. A
+  // zero hash seed means we will use the Java compatible hash
+  // function (which doesn't use a seed), and a non-zero hash
+  // seed means we use the murmur3 hash function.
+  jint                            _hash_seed;
+
+  // Constants governing table resize/rehash/cache.
+  static const size_t             _min_size;
+  static const size_t             _max_size;
+  static const double             _grow_load_factor;
+  static const double             _shrink_load_factor;
+  static const uintx              _rehash_multiple;
+  static const uintx              _rehash_threshold;
+  static const double             _max_cache_factor;
+
+  // Table statistics, only used for logging.
+  static uintx                    _entries_added;
+  static uintx                    _entries_removed;
+  static uintx                    _resize_count;
+  static uintx                    _rehash_count;
+
+  G1StringDedupTable(size_t size, jint hash_seed = 0);
+  ~G1StringDedupTable();
+
+  // Returns the hash bucket at the given index.
+  G1StringDedupEntry** bucket(size_t index) {
+    return _buckets + index;
+  }
+
+  // Returns the hash bucket index for the given hash code.
+  size_t hash_to_index(unsigned int hash) {
+    return (size_t)hash & (_size - 1);
+  }
+
+  // Adds a new table entry to the given hash bucket.
+  void add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list);
+
+  // Removes the given table entry from the table.
+  void remove(G1StringDedupEntry** pentry, uint worker_id);
+
+  // Transfers a table entry from the current table to the destination table.
+  void transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest);
+
+  // Returns an existing character array in the given hash bucket, or NULL
+  // if no matching character array exists.
+  typeArrayOop lookup(typeArrayOop value, unsigned int hash,
+                      G1StringDedupEntry** list, uintx &count);
+
+  // Returns an existing character array in the table, or inserts a new
+  // table entry if no matching character array exists.
+  typeArrayOop lookup_or_add_inner(typeArrayOop value, unsigned int hash);
+
+  // Thread safe lookup or add of table entry
+  static typeArrayOop lookup_or_add(typeArrayOop value, unsigned int hash) {
+    // Protect the table from concurrent access. Also note that this lock
+    // acts as a fence for _table, which could have been replaced by a new
+    // instance if the table was resized or rehashed.
+    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+    return _table->lookup_or_add_inner(value, hash);
+  }
+
+  // Returns true if the hashtable is currently using a Java compatible
+  // hash function.
+  static bool use_java_hash() {
+    return _table->_hash_seed == 0;
+  }
+
+  static bool equals(typeArrayOop value1, typeArrayOop value2);
+
+  // Computes the hash code for the given character array, using the
+  // currently active hash function and hash seed.
+  static unsigned int hash_code(typeArrayOop value);
+
+  static uintx unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
+                                 size_t partition_begin,
+                                 size_t partition_end,
+                                 uint worker_id);
+
+public:
+  static void create();
+
+  // Deduplicates the given String object, or adds its backing
+  // character array to the deduplication hashtable.
+  static void deduplicate(oop java_string, G1StringDedupStat& stat);
+
+  // If a table resize is needed, returns a newly allocated empty
+  // hashtable of the proper size.
+  static G1StringDedupTable* prepare_resize();
+
+  // Installs a newly resized table as the currently active table
+  // and deletes the previously active table.
+  static void finish_resize(G1StringDedupTable* resized_table);
+
+  // If a table rehash is needed, returns a newly allocated empty
+  // hashtable and updates the hash seed.
+  static G1StringDedupTable* prepare_rehash();
+
+  // Transfers rehashed entries from the currently active table into
+  // the new table. Installs the new table as the currently active table
+  // and deletes the previously active table.
+  static void finish_rehash(G1StringDedupTable* rehashed_table);
+
+  // If the table entry cache has grown too large, trim it down according to policy
+  static void trim_entry_cache();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id);
+
+  static void print_statistics(outputStream* st);
+  static void verify();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "gc_implementation/g1/g1StringDedupThread.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+
+G1StringDedupThread* G1StringDedupThread::_thread = NULL;
+
+G1StringDedupThread::G1StringDedupThread() :
+  ConcurrentGCThread() {
+  set_name("String Deduplication Thread");
+  create_and_start();
+}
+
+G1StringDedupThread::~G1StringDedupThread() {
+  ShouldNotReachHere();
+}
+
+void G1StringDedupThread::create() {
+  assert(G1StringDedup::is_enabled(), "String deduplication not enabled");
+  assert(_thread == NULL, "One string deduplication thread allowed");
+  _thread = new G1StringDedupThread();
+}
+
+G1StringDedupThread* G1StringDedupThread::thread() {
+  assert(G1StringDedup::is_enabled(), "String deduplication not enabled");
+  assert(_thread != NULL, "String deduplication thread not created");
+  return _thread;
+}
+
+void G1StringDedupThread::print_on(outputStream* st) const {
+  st->print("\"%s\" ", name());
+  Thread::print_on(st);
+  st->cr();
+}
+
+void G1StringDedupThread::run() {
+  G1StringDedupStat total_stat;
+
+  initialize_in_thread();
+  wait_for_universe_init();
+
+  // Main loop
+  for (;;) {
+    G1StringDedupStat stat;
+
+    stat.mark_idle();
+
+    // Wait for the queue to become non-empty
+    G1StringDedupQueue::wait();
+
+    // Include this thread in safepoints
+    stsJoin();
+
+    stat.mark_exec();
+
+    // Process the queue
+    for (;;) {
+      oop java_string = G1StringDedupQueue::pop();
+      if (java_string == NULL) {
+        break;
+      }
+
+      G1StringDedupTable::deduplicate(java_string, stat);
+
+      // Safepoint this thread if needed
+      if (stsShouldYield()) {
+        stat.mark_block();
+        stsYield(NULL);
+        stat.mark_unblock();
+      }
+    }
+
+    G1StringDedupTable::trim_entry_cache();
+
+    stat.mark_done();
+
+    // Print statistics
+    total_stat.add(stat);
+    print(gclog_or_tty, stat, total_stat);
+
+    // Exclude this thread from safepoints
+    stsLeave();
+  }
+
+  ShouldNotReachHere();
+}
+
+void G1StringDedupThread::print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) {
+  if (G1Log::fine() || PrintStringDeduplicationStatistics) {
+    G1StringDedupStat::print_summary(st, last_stat, total_stat);
+    if (PrintStringDeduplicationStatistics) {
+      G1StringDedupStat::print_statistics(st, last_stat, false);
+      G1StringDedupStat::print_statistics(st, total_stat, true);
+      G1StringDedupTable::print_statistics(st);
+      G1StringDedupQueue::print_statistics(st);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupThread.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
+
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "gc_implementation/shared/concurrentGCThread.hpp"
+
+//
+// The deduplication thread is where the actual deduplication occurs. It waits for
+// deduplication candidates to appear on the deduplication queue, removes them from
+// the queue and tries to deduplicate them. It uses the deduplication hashtable to
+// find identical, already existing, character arrays on the heap. The thread runs
+// concurrently with the Java application but participates in safepoints to allow
+// the GC to adjust and unlink oops from the deduplication queue and table.
+//
+class G1StringDedupThread: public ConcurrentGCThread {
+private:
+  static G1StringDedupThread* _thread;
+
+  G1StringDedupThread();
+  ~G1StringDedupThread();
+
+  void print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat);
+
+public:
+  static void create();
+  static G1StringDedupThread* thread();
+
+  virtual void run();
+  virtual void print_on(outputStream* st) const;
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +285,10 @@
   product(uintx, G1MixedGCCountTarget, 8,                                   \
           "The target number of mixed GCs after a marking cycle.")          \
                                                                             \
+  experimental(uintx, G1CodeRootsChunkCacheKeepPercent, 10,                 \
+          "The amount of code root chunks that should be kept at most "     \
+          "as percentage of already allocated.")                            \
+                                                                            \
   experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -43,8 +43,6 @@
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;

-typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class G1CMOopClosure;
@@ -61,7 +59,6 @@
 #endif

 #define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
-      f(G1ParScanHeapEvacClosure,_nv)                   \
       f(G1ParScanClosure,_nv)                           \
       f(G1ParPushHeapRSClosure,_nv)                     \
       f(FilterIntoCSClosure,_nv)                        \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -205,7 +205,7 @@
   init_top_at_mark_start();
 }

-void HeapRegion::hr_clear(bool par, bool clear_space) {
+void HeapRegion::hr_clear(bool par, bool clear_space, bool locked) {
   assert(_humongous_type == NotHumongous,
          "we should have already filtered out humongous regions");
   assert(_humongous_start_region == NULL,
@@ -223,7 +223,11 @@
   if (!par) {
     // If this is parallel, this will be done later.
     HeapRegionRemSet* hrrs = rem_set();
-    hrrs->clear();
+    if (locked) {
+      hrrs->clear_locked();
+    } else {
+      hrrs->clear();
+    }
     _claimed = InitialClaimValue;
   }
   zero_marked_bytes();
@@ -352,7 +356,7 @@
     _claimed(InitialClaimValue), _evacuation_failed(false),
     _prev_marked_bytes(0), _next_marked_bytes(0), _gc_efficiency(0.0),
     _young_type(NotYoung), _next_young_region(NULL),
-    _next_dirty_cards_region(NULL), _next(NULL), _pending_removal(false),
+    _next_dirty_cards_region(NULL), _next(NULL), _prev(NULL), _pending_removal(false),
 #ifdef ASSERT
     _containing_set(NULL),
 #endif // ASSERT
@@ -710,14 +714,14 @@
   }

   HeapRegionRemSet* hrrs = rem_set();
-  int strong_code_roots_length = hrrs->strong_code_roots_list_length();
+  size_t strong_code_roots_length = hrrs->strong_code_roots_list_length();

   // if this region is empty then there should be no entries
   // on its strong code root list
   if (is_empty()) {
     if (strong_code_roots_length > 0) {
       gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is empty "
-                             "but has "INT32_FORMAT" code root entries",
+                             "but has "SIZE_FORMAT" code root entries",
                              bottom(), end(), strong_code_roots_length);
       *failures = true;
     }
@@ -727,7 +731,7 @@
   if (continuesHumongous()) {
     if (strong_code_roots_length > 0) {
       gclog_or_tty->print_cr("region "HR_FORMAT" is a continuation of a humongous "
-                             "region but has "INT32_FORMAT" code root entries",
+                             "region but has "SIZE_FORMAT" code root entries",
                              HR_FORMAT_PARAMS(this), strong_code_roots_length);
       *failures = true;
     }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -271,6 +271,7 @@

   // Fields used by the HeapRegionSetBase class and subclasses.
   HeapRegion* _next;
+  HeapRegion* _prev;
 #ifdef ASSERT
   HeapRegionSetBase* _containing_set;
 #endif // ASSERT
@@ -531,11 +532,13 @@

   // Methods used by the HeapRegionSetBase class and subclasses.

-  // Getter and setter for the next field used to link regions into
+  // Getter and setter for the next and prev fields used to link regions into
   // linked lists.
   HeapRegion* next()              { return _next; }
+  HeapRegion* prev()              { return _prev; }

   void set_next(HeapRegion* next) { _next = next; }
+  void set_prev(HeapRegion* prev) { _prev = prev; }

   // Every region added to a set is tagged with a reference to that
   // set. This is used for doing consistency checking to make sure that
@@ -596,7 +599,7 @@
   void save_marks();

   // Reset HR stuff to default values.
-  void hr_clear(bool par, bool clear_space);
+  void hr_clear(bool par, bool clear_space, bool locked = false);
   void par_clear();

   // Get the start of the unmarked area in this region.
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/allocation.hpp"
+#include "memory/padded.inline.hpp"
 #include "memory/space.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/bitMap.inline.hpp"
@@ -259,10 +260,9 @@
 size_t OtherRegionsTable::_fine_eviction_stride = 0;
 size_t OtherRegionsTable::_fine_eviction_sample_size = 0;

-OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
+OtherRegionsTable::OtherRegionsTable(HeapRegion* hr, Mutex* m) :
   _g1h(G1CollectedHeap::heap()),
-  _m(Mutex::leaf, "An OtherRegionsTable lock", true),
-  _hr(hr),
+  _hr(hr), _m(m),
   _coarse_map(G1CollectedHeap::heap()->max_regions(),
               false /* in-resource-area */),
   _fine_grain_regions(NULL),
@@ -358,46 +358,66 @@
          "just checking");
 }

-int**  OtherRegionsTable::_from_card_cache = NULL;
-size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
-size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
+int**  FromCardCache::_cache = NULL;
+uint   FromCardCache::_max_regions = 0;
+size_t FromCardCache::_static_mem_size = 0;

-void OtherRegionsTable::init_from_card_cache(size_t max_regions) {
-  _from_card_cache_max_regions = max_regions;
+void FromCardCache::initialize(uint n_par_rs, uint max_num_regions) {
+  guarantee(_cache == NULL, "Should not call this multiple times");

-  int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
-  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs, mtGC);
-  for (int i = 0; i < n_par_rs; i++) {
-    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions, mtGC);
-    for (size_t j = 0; j < max_regions; j++) {
-      _from_card_cache[i][j] = -1;  // An invalid value.
+  _max_regions = max_num_regions;
+  _cache = Padded2DArray<int, mtGC>::create_unfreeable(n_par_rs,
+                                                       _max_regions,
+                                                       &_static_mem_size);
+
+  for (uint i = 0; i < n_par_rs; i++) {
+    for (uint j = 0; j < _max_regions; j++) {
+      set(i, j, InvalidCard);
     }
   }
-  _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int);
 }

-void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
-  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
-    assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
-    for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) {
-      _from_card_cache[i][j] = -1;  // An invalid value.
+void FromCardCache::shrink(uint new_num_regions) {
+  for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    assert(new_num_regions <= _max_regions, "Must be within max.");
+    for (uint j = new_num_regions; j < _max_regions; j++) {
+      set(i, j, InvalidCard);
     }
   }
 }

 #ifndef PRODUCT
-void OtherRegionsTable::print_from_card_cache() {
-  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
-    for (size_t j = 0; j < _from_card_cache_max_regions; j++) {
-      gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
-                    i, j, _from_card_cache[i][j]);
+void FromCardCache::print(outputStream* out) {
+  for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    for (uint j = 0; j < _max_regions; j++) {
+      out->print_cr("_from_card_cache["UINT32_FORMAT"]["UINT32_FORMAT"] = "INT32_FORMAT".",
+                    i, j, at(i, j));
     }
   }
 }
 #endif

+void FromCardCache::clear(uint region_idx) {
+  uint num_par_remsets = HeapRegionRemSet::num_par_rem_sets();
+  for (uint i = 0; i < num_par_remsets; i++) {
+    set(i, region_idx, InvalidCard);
+  }
+}
+
+void OtherRegionsTable::init_from_card_cache(uint max_regions) {
+  FromCardCache::initialize(HeapRegionRemSet::num_par_rem_sets(), max_regions);
+}
+
+void OtherRegionsTable::shrink_from_card_cache(uint new_num_regions) {
+  FromCardCache::shrink(new_num_regions);
+}
+
+void OtherRegionsTable::print_from_card_cache() {
+  FromCardCache::print();
+}
+
 void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
-  size_t cur_hrs_ind = (size_t) hr()->hrs_index();
+  uint cur_hrs_ind = hr()->hrs_index();

   if (G1TraceHeapRegionRememberedSet) {
     gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
@@ -410,19 +430,17 @@
   int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);

   if (G1TraceHeapRegionRememberedSet) {
-    gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+    gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = "INT32_FORMAT")",
                   hr()->bottom(), from_card,
-                  _from_card_cache[tid][cur_hrs_ind]);
+                  FromCardCache::at((uint)tid, cur_hrs_ind));
   }

-  if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
+  if (FromCardCache::contains_or_replace((uint)tid, cur_hrs_ind, from_card)) {
     if (G1TraceHeapRegionRememberedSet) {
       gclog_or_tty->print_cr("  from-card cache hit.");
     }
     assert(contains_reference(from), "We just added it!");
     return;
-  } else {
-    _from_card_cache[tid][cur_hrs_ind] = from_card;
   }

   // Note that this may be a continued H region.
@@ -442,7 +460,7 @@
   size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
   PerRegionTable* prt = find_region_table(ind, from_hr);
   if (prt == NULL) {
-    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+    MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
     // Confirm that it's really not there...
     prt = find_region_table(ind, from_hr);
     if (prt == NULL) {
@@ -544,7 +562,7 @@
 jint OtherRegionsTable::_n_coarsenings = 0;

 PerRegionTable* OtherRegionsTable::delete_region_table() {
-  assert(_m.owned_by_self(), "Precondition");
+  assert(_m->owned_by_self(), "Precondition");
   assert(_n_fine_entries == _max_fine_entries, "Precondition");
   PerRegionTable* max = NULL;
   jint max_occ = 0;
@@ -676,8 +694,6 @@


 size_t OtherRegionsTable::occupied() const {
-  // Cast away const in this case.
-  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
   size_t sum = occ_fine();
   sum += occ_sparse();
   sum += occ_coarse();
@@ -707,8 +723,6 @@
 }

 size_t OtherRegionsTable::mem_size() const {
-  // Cast away const in this case.
-  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
   size_t sum = 0;
   // all PRTs are of the same size so it is sufficient to query only one of them.
   if (_first_all_fine_prts != NULL) {
@@ -724,7 +738,7 @@
 }

 size_t OtherRegionsTable::static_mem_size() {
-  return _from_card_cache_mem_size;
+  return FromCardCache::static_mem_size();
 }

 size_t OtherRegionsTable::fl_mem_size() {
@@ -732,14 +746,10 @@
 }

 void OtherRegionsTable::clear_fcc() {
-  size_t hrs_idx = hr()->hrs_index();
-  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
-    _from_card_cache[i][hrs_idx] = -1;
-  }
+  FromCardCache::clear(hr()->hrs_index());
 }

 void OtherRegionsTable::clear() {
-  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
   // if there are no entries, skip this step
   if (_first_all_fine_prts != NULL) {
     guarantee(_first_all_fine_prts != NULL && _last_all_fine_prts != NULL, "just checking");
@@ -759,7 +769,7 @@
 }

 void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
-  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
   size_t hrs_ind = (size_t) from_hr->hrs_index();
   size_t ind = hrs_ind & _mod_max_fine_entries_mask;
   if (del_single_region_table(ind, from_hr)) {
@@ -768,15 +778,15 @@
     _coarse_map.par_at_put(hrs_ind, 0);
   }
   // Check to see if any of the fcc entries come from here.
-  size_t hr_ind = (size_t) hr()->hrs_index();
-  for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
-    int fcc_ent = _from_card_cache[tid][hr_ind];
-    if (fcc_ent != -1) {
+  uint hr_ind = hr()->hrs_index();
+  for (uint tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
+    int fcc_ent = FromCardCache::at(tid, hr_ind);
+    if (fcc_ent != FromCardCache::InvalidCard) {
       HeapWord* card_addr = (HeapWord*)
         (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift);
       if (hr()->is_in_reserved(card_addr)) {
         // Clear the from card cache.
-        _from_card_cache[tid][hr_ind] = -1;
+        FromCardCache::set(tid, hr_ind, FromCardCache::InvalidCard);
       }
     }
   }
@@ -805,7 +815,7 @@

 bool OtherRegionsTable::contains_reference(OopOrNarrowOopStar from) const {
   // Cast away const in this case.
-  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  MutexLockerEx x((Mutex*)_m, Mutex::_no_safepoint_check_flag);
   return contains_reference_locked(from);
 }

@@ -832,8 +842,6 @@
            "Must be in range.");
     return _sparse_table.contains_card(hr_ind, card_index);
   }
-
-
 }

 void
@@ -844,13 +852,15 @@
 // Determines how many threads can add records to an rset in parallel.
 // This can be done by either mutator threads together with the
 // concurrent refinement threads or GC threads.
-int HeapRegionRemSet::num_par_rem_sets() {
-  return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
+uint HeapRegionRemSet::num_par_rem_sets() {
+  return (uint)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
 }

 HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
                                    HeapRegion* hr)
-  : _bosa(bosa), _strong_code_roots_list(NULL), _other_regions(hr) {
+  : _bosa(bosa),
+    _m(Mutex::leaf, FormatBuffer<128>("HeapRegionRemSet lock #"UINT32_FORMAT, hr->hrs_index()), true),
+    _code_roots(), _other_regions(hr, &_m) {
   reset_for_par_iteration();
 }

@@ -883,7 +893,7 @@
 }

 #ifndef PRODUCT
-void HeapRegionRemSet::print() const {
+void HeapRegionRemSet::print() {
   HeapRegionRemSetIterator iter(this);
   size_t card_index;
   while (iter.has_next(card_index)) {
@@ -909,14 +919,14 @@
 }

 void HeapRegionRemSet::clear() {
-  if (_strong_code_roots_list != NULL) {
-    delete _strong_code_roots_list;
-  }
-  _strong_code_roots_list = new (ResourceObj::C_HEAP, mtGC)
-                                GrowableArray<nmethod*>(10, 0, NULL, true);
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  clear_locked();
+}

+void HeapRegionRemSet::clear_locked() {
+  _code_roots.clear();
   _other_regions.clear();
-  assert(occupied() == 0, "Should be clear.");
+  assert(occupied_locked() == 0, "Should be clear.");
   reset_for_par_iteration();
 }

@@ -932,27 +942,18 @@
   _other_regions.scrub(ctbs, region_bm, card_bm);
 }

-
 // Code roots support

 void HeapRegionRemSet::add_strong_code_root(nmethod* nm) {
   assert(nm != NULL, "sanity");
-  // Search for the code blob from the RHS to avoid
-  // duplicate entries as much as possible
-  if (_strong_code_roots_list->find_from_end(nm) < 0) {
-    // Code blob isn't already in the list
-    _strong_code_roots_list->push(nm);
-  }
+  _code_roots.add(nm);
 }

 void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
   assert(nm != NULL, "sanity");
-  int idx = _strong_code_roots_list->find(nm);
-  if (idx >= 0) {
-    _strong_code_roots_list->remove_at(idx);
-  }
+  _code_roots.remove(nm);
   // Check that there were no duplicates
-  guarantee(_strong_code_roots_list->find(nm) < 0, "duplicate entry found");
+  guarantee(!_code_roots.contains(nm), "duplicate entry found");
 }

 class NMethodMigrationOopClosure : public OopClosure {
@@ -1014,8 +1015,8 @@
   GrowableArray<nmethod*> to_be_retained(10);
   G1CollectedHeap* g1h = G1CollectedHeap::heap();

-  while (_strong_code_roots_list->is_nonempty()) {
-    nmethod *nm = _strong_code_roots_list->pop();
+  while (!_code_roots.is_empty()) {
+    nmethod *nm = _code_roots.pop();
     if (nm != NULL) {
       NMethodMigrationOopClosure oop_cl(g1h, hr(), nm);
       nm->oops_do(&oop_cl);
@@ -1038,20 +1039,16 @@
 }

 void HeapRegionRemSet::strong_code_roots_do(CodeBlobClosure* blk) const {
-  for (int i = 0; i < _strong_code_roots_list->length(); i += 1) {
-    nmethod* nm = _strong_code_roots_list->at(i);
-    blk->do_code_blob(nm);
-  }
+  _code_roots.nmethods_do(blk);
 }

 size_t HeapRegionRemSet::strong_code_roots_mem_size() {
-  return sizeof(GrowableArray<nmethod*>) +
-         _strong_code_roots_list->max_length() * sizeof(nmethod*);
+  return _code_roots.mem_size();
 }

 //-------------------- Iteration --------------------

-HeapRegionRemSetIterator:: HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs) :
+HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) :
   _hrrs(hrrs),
   _g1h(G1CollectedHeap::heap()),
   _coarse_map(&hrrs->_other_regions._coarse_map),
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP

+#include "gc_implementation/g1/g1CodeCacheRemSet.hpp"
 #include "gc_implementation/g1/sparsePRT.hpp"

 // Remembered set for a heap region.  Represent a set of "cards" that
@@ -44,6 +45,54 @@
 class HRRSCleanupTask : public SparsePRTCleanupTask {
 };

+// The FromCardCache remembers the most recently processed card on the heap on
+// a per-region and per-thread basis.
+class FromCardCache : public AllStatic {
+ private:
+  // Array of card indices. Indexed by thread X and heap region to minimize
+  // thread contention.
+  static int** _cache;
+  static uint _max_regions;
+  static size_t _static_mem_size;
+
+ public:
+  enum {
+    InvalidCard = -1 // Card value of an invalid card, i.e. a card index not otherwise used.
+  };
+
+  static void clear(uint region_idx);
+
+  // Returns true if the given card is in the cache at the given location, or
+  // replaces the card at that location and returns false.
+  static bool contains_or_replace(uint worker_id, uint region_idx, int card) {
+    int card_in_cache = at(worker_id, region_idx);
+    if (card_in_cache == card) {
+      return true;
+    } else {
+      set(worker_id, region_idx, card);
+      return false;
+    }
+  }
+
+  static int at(uint worker_id, uint region_idx) {
+    return _cache[worker_id][region_idx];
+  }
+
+  static void set(uint worker_id, uint region_idx, int val) {
+    _cache[worker_id][region_idx] = val;
+  }
+
+  static void initialize(uint n_par_rs, uint max_num_regions);
+
+  static void shrink(uint new_num_regions);
+
+  static void print(outputStream* out = gclog_or_tty) PRODUCT_RETURN;
+
+  static size_t static_mem_size() {
+    return _static_mem_size;
+  }
+};
+
 // The "_coarse_map" is a bitmap with one bit for each region, where set
 // bits indicate that the corresponding region may contain some pointer
 // into the owning region.
@@ -72,7 +121,7 @@
   friend class HeapRegionRemSetIterator;

   G1CollectedHeap* _g1h;
-  Mutex            _m;
+  Mutex*           _m;
   HeapRegion*      _hr;

   // These are protected by "_m".
@@ -118,18 +167,13 @@
   // false.
   bool del_single_region_table(size_t ind, HeapRegion* hr);

-  // Indexed by thread X heap region, to minimize thread contention.
-  static int** _from_card_cache;
-  static size_t _from_card_cache_max_regions;
-  static size_t _from_card_cache_mem_size;
-
   // link/add the given fine grain remembered set into the "all" list
   void link_to_all(PerRegionTable * prt);
   // unlink/remove the given fine grain remembered set into the "all" list
   void unlink_from_all(PerRegionTable * prt);

 public:
-  OtherRegionsTable(HeapRegion* hr);
+  OtherRegionsTable(HeapRegion* hr, Mutex* m);

   HeapRegion* hr() const { return _hr; }

@@ -141,7 +185,6 @@
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);

-  // Not const because it takes a lock.
   size_t occupied() const;
   size_t occ_fine() const;
   size_t occ_coarse() const;
@@ -170,11 +213,11 @@

   // Declare the heap size (in # of regions) to the OtherRegionsTable.
   // (Uses it to initialize from_card_cache).
-  static void init_from_card_cache(size_t max_regions);
+  static void init_from_card_cache(uint max_regions);

   // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
   // Make sure any entries for higher regions are invalid.
-  static void shrink_from_card_cache(size_t new_n_regs);
+  static void shrink_from_card_cache(uint new_num_regions);

   static void print_from_card_cache();
 };
@@ -192,9 +235,11 @@
   G1BlockOffsetSharedArray* _bosa;
   G1BlockOffsetSharedArray* bosa() const { return _bosa; }

-  // A list of code blobs (nmethods) whose code contains pointers into
+  // A set of code blobs (nmethods) whose code contains pointers into
   // the region that owns this RSet.
-  GrowableArray<nmethod*>* _strong_code_roots_list;
+  G1CodeRootSet _code_roots;
+
+  Mutex _m;

   OtherRegionsTable _other_regions;

@@ -218,17 +263,20 @@
   static void print_event(outputStream* str, Event evnt);

 public:
-  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
-                   HeapRegion* hr);
+  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr);

-  static int num_par_rem_sets();
+  static uint num_par_rem_sets();
   static void setup_remset_size();

   HeapRegion* hr() const {
     return _other_regions.hr();
   }

-  size_t occupied() const {
+  size_t occupied() {
+    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+    return occupied_locked();
+  }
+  size_t occupied_locked() {
     return _other_regions.occupied();
   }
   size_t occ_fine() const {
@@ -260,6 +308,7 @@
   // The region is being reclaimed; clear its remset, and any mention of
   // entries for this region in other remsets.
   void clear();
+  void clear_locked();

   // Attempt to claim the region.  Returns true iff this call caused an
   // atomic transition from Unclaimed to Claimed.
@@ -289,6 +338,7 @@
   // The actual # of bytes this hr_remset takes up.
   // Note also includes the strong code root set.
   size_t mem_size() {
+    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     return _other_regions.mem_size()
       // This correction is necessary because the above includes the second
       // part.
@@ -299,13 +349,13 @@
   // Returns the memory occupancy of all static data structures associated
   // with remembered sets.
   static size_t static_mem_size() {
-    return OtherRegionsTable::static_mem_size();
+    return OtherRegionsTable::static_mem_size() + G1CodeRootSet::static_mem_size();
   }

   // Returns the memory occupancy of all free_list data structures associated
   // with remembered sets.
   static size_t fl_mem_size() {
-    return OtherRegionsTable::fl_mem_size();
+    return OtherRegionsTable::fl_mem_size() + G1CodeRootSet::fl_mem_size();
   }

   bool contains_reference(OopOrNarrowOopStar from) const {
@@ -328,21 +378,21 @@
   void strong_code_roots_do(CodeBlobClosure* blk) const;

   // Returns the number of elements in the strong code roots list
-  int strong_code_roots_list_length() {
-    return _strong_code_roots_list->length();
+  size_t strong_code_roots_list_length() {
+    return _code_roots.length();
   }

   // Returns true if the strong code roots contains the given
   // nmethod.
   bool strong_code_roots_list_contains(nmethod* nm) {
-    return _strong_code_roots_list->contains(nm);
+    return _code_roots.contains(nm);
   }

   // Returns the amount of memory, in bytes, currently
   // consumed by the strong code roots.
   size_t strong_code_roots_mem_size();

-  void print() const;
+  void print() PRODUCT_RETURN;

   // Called during a stop-world phase to perform any deferred cleanups.
   static void cleanup();
@@ -350,12 +400,13 @@
   // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
   // (Uses it to initialize from_card_cache).
   static void init_heap(uint max_regions) {
-    OtherRegionsTable::init_from_card_cache((size_t) max_regions);
+    G1CodeRootSet::initialize();
+    OtherRegionsTable::init_from_card_cache(max_regions);
   }

   // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
   static void shrink_heap(uint new_n_regs) {
-    OtherRegionsTable::shrink_from_card_cache((size_t) new_n_regs);
+    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
   }

 #ifndef PRODUCT
@@ -384,7 +435,7 @@
 class HeapRegionRemSetIterator : public StackObj {

   // The region RSet over which we're iterating.
-  const HeapRegionRemSet* _hrrs;
+  HeapRegionRemSet* _hrrs;

   // Local caching of HRRS fields.
   const BitMap*             _coarse_map;
@@ -441,7 +492,7 @@
 public:
   // We require an iterator to be initialized before use, so the
   // constructor does little.
-  HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs);
+  HeapRegionRemSetIterator(HeapRegionRemSet* hrrs);

   // If there remains one or more cards to be yielded, returns true and
   // sets "card_index" to one of those cards (which is then considered
--- a/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -25,7 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
-#include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "memory/allocation.hpp"
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,171 +23,60 @@
  */

 #include "precompiled.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSet.inline.hpp"

-uint HeapRegionSetBase::_unrealistically_long_length = 0;
-HRSPhase HeapRegionSetBase::_phase = HRSPhaseNone;
-
-//////////////////// HeapRegionSetBase ////////////////////
-
-void HeapRegionSetBase::set_unrealistically_long_length(uint len) {
-  guarantee(_unrealistically_long_length == 0, "should only be set once");
-  _unrealistically_long_length = len;
-}
+uint FreeRegionList::_unrealistically_long_length = 0;

 void HeapRegionSetBase::fill_in_ext_msg(hrs_ext_msg* msg, const char* message) {
-  msg->append("[%s] %s ln: %u rn: %u cy: "SIZE_FORMAT" ud: "SIZE_FORMAT,
-              name(), message, length(), region_num(),
-              total_capacity_bytes(), total_used_bytes());
+  msg->append("[%s] %s ln: %u cy: "SIZE_FORMAT,
+              name(), message, length(), total_capacity_bytes());
   fill_in_ext_msg_extra(msg);
 }

-bool HeapRegionSetBase::verify_region(HeapRegion* hr,
-                                  HeapRegionSetBase* expected_containing_set) {
-  const char* error_message = NULL;
-
-  if (!regions_humongous()) {
-    if (hr->isHumongous()) {
-      error_message = "the region should not be humongous";
-    }
-  } else {
-    if (!hr->isHumongous() || !hr->startsHumongous()) {
-      error_message = "the region should be 'starts humongous'";
-    }
-  }
-
-  if (!regions_empty()) {
-    if (hr->is_empty()) {
-      error_message = "the region should not be empty";
-    }
-  } else {
-    if (!hr->is_empty()) {
-      error_message = "the region should be empty";
-    }
-  }
-
-#ifdef ASSERT
-  // The _containing_set field is only available when ASSERT is defined.
-  if (hr->containing_set() != expected_containing_set) {
-    error_message = "inconsistent containing set found";
-  }
-#endif // ASSERT
-
-  const char* extra_error_message = verify_region_extra(hr);
-  if (extra_error_message != NULL) {
-    error_message = extra_error_message;
-  }
-
-  if (error_message != NULL) {
-    outputStream* out = tty;
-    out->cr();
-    out->print_cr("## [%s] %s", name(), error_message);
-    out->print_cr("## Offending Region: "PTR_FORMAT, hr);
-    out->print_cr("   "HR_FORMAT, HR_FORMAT_PARAMS(hr));
-#ifdef ASSERT
-    out->print_cr("   containing set: "PTR_FORMAT, hr->containing_set());
-#endif // ASSERT
-    out->print_cr("## Offending Region Set: "PTR_FORMAT, this);
-    print_on(out);
-    return false;
-  } else {
-    return true;
-  }
+#ifndef PRODUCT
+void HeapRegionSetBase::verify_region(HeapRegion* hr) {
+  assert(hr->containing_set() == this, err_msg("Inconsistent containing set for %u", hr->hrs_index()));
+  assert(!hr->is_young(), err_msg("Adding young region %u", hr->hrs_index())); // currently we don't use these sets for young regions
+  assert(hr->isHumongous() == regions_humongous(), err_msg("Wrong humongous state for region %u and set %s", hr->hrs_index(), name()));
+  assert(hr->is_empty() == regions_empty(), err_msg("Wrong empty state for region %u and set %s", hr->hrs_index(), name()));
+  assert(hr->rem_set()->verify_ready_for_par_iteration(), err_msg("Wrong iteration state %u", hr->hrs_index()));
 }
+#endif

 void HeapRegionSetBase::verify() {
   // It's important that we also observe the MT safety protocol even
   // for the verification calls. If we do verification without the
   // appropriate locks and the set changes underneath our feet
   // verification might fail and send us on a wild goose chase.
-  hrs_assert_mt_safety_ok(this);
-
-  guarantee(( is_empty() && length() == 0 && region_num() == 0 &&
-              total_used_bytes() == 0 && total_capacity_bytes() == 0) ||
-            (!is_empty() && length() >= 0 && region_num() >= 0 &&
-              total_used_bytes() >= 0 && total_capacity_bytes() >= 0),
-            hrs_ext_msg(this, "invariant"));
+  check_mt_safety();

-  guarantee((!regions_humongous() && region_num() == length()) ||
-            ( regions_humongous() && region_num() >= length()),
-            hrs_ext_msg(this, "invariant"));
-
-  guarantee(!regions_empty() || total_used_bytes() == 0,
-            hrs_ext_msg(this, "invariant"));
-
-  guarantee(total_used_bytes() <= total_capacity_bytes(),
+  guarantee(( is_empty() && length() == 0 && total_capacity_bytes() == 0) ||
+            (!is_empty() && length() >= 0 && total_capacity_bytes() >= 0),
             hrs_ext_msg(this, "invariant"));
 }

 void HeapRegionSetBase::verify_start() {
   // See comment in verify() about MT safety and verification.
-  hrs_assert_mt_safety_ok(this);
+  check_mt_safety();
   assert(!_verify_in_progress,
          hrs_ext_msg(this, "verification should not be in progress"));

   // Do the basic verification first before we do the checks over the regions.
   HeapRegionSetBase::verify();

-  _calc_length               = 0;
-  _calc_region_num           = 0;
-  _calc_total_capacity_bytes = 0;
-  _calc_total_used_bytes     = 0;
   _verify_in_progress        = true;
 }

-void HeapRegionSetBase::verify_next_region(HeapRegion* hr) {
-  // See comment in verify() about MT safety and verification.
-  hrs_assert_mt_safety_ok(this);
-  assert(_verify_in_progress,
-         hrs_ext_msg(this, "verification should be in progress"));
-
-  guarantee(verify_region(hr, this), hrs_ext_msg(this, "region verification"));
-
-  _calc_length               += 1;
-  _calc_region_num           += hr->region_num();
-  _calc_total_capacity_bytes += hr->capacity();
-  _calc_total_used_bytes     += hr->used();
-}
-
 void HeapRegionSetBase::verify_end() {
   // See comment in verify() about MT safety and verification.
-  hrs_assert_mt_safety_ok(this);
+  check_mt_safety();
   assert(_verify_in_progress,
          hrs_ext_msg(this, "verification should be in progress"));

-  guarantee(length() == _calc_length,
-            hrs_err_msg("[%s] length: %u should be == calc length: %u",
-                        name(), length(), _calc_length));
-
-  guarantee(region_num() == _calc_region_num,
-            hrs_err_msg("[%s] region num: %u should be == calc region num: %u",
-                        name(), region_num(), _calc_region_num));
-
-  guarantee(total_capacity_bytes() == _calc_total_capacity_bytes,
-            hrs_err_msg("[%s] capacity bytes: "SIZE_FORMAT" should be == "
-                        "calc capacity bytes: "SIZE_FORMAT,
-                        name(),
-                        total_capacity_bytes(), _calc_total_capacity_bytes));
-
-  guarantee(total_used_bytes() == _calc_total_used_bytes,
-            hrs_err_msg("[%s] used bytes: "SIZE_FORMAT" should be == "
-                        "calc used bytes: "SIZE_FORMAT,
-                        name(), total_used_bytes(), _calc_total_used_bytes));
-
   _verify_in_progress = false;
 }

-void HeapRegionSetBase::clear_phase() {
-  assert(_phase != HRSPhaseNone, "pre-condition");
-  _phase = HRSPhaseNone;
-}
-
-void HeapRegionSetBase::set_phase(HRSPhase phase) {
-  assert(_phase == HRSPhaseNone, "pre-condition");
-  assert(phase != HRSPhaseNone, "pre-condition");
-  _phase = phase;
-}
-
 void HeapRegionSetBase::print_on(outputStream* out, bool print_contents) {
   out->cr();
   out->print_cr("Set: %s ("PTR_FORMAT")", name(), this);
@@ -196,76 +85,38 @@
   out->print_cr("    empty             : %s", BOOL_TO_STR(regions_empty()));
   out->print_cr("  Attributes");
   out->print_cr("    length            : %14u", length());
-  out->print_cr("    region num        : %14u", region_num());
   out->print_cr("    total capacity    : "SIZE_FORMAT_W(14)" bytes",
                 total_capacity_bytes());
-  out->print_cr("    total used        : "SIZE_FORMAT_W(14)" bytes",
-                total_used_bytes());
-}
-
-void HeapRegionSetBase::clear() {
-  _length           = 0;
-  _region_num       = 0;
-  _total_used_bytes = 0;
 }

-HeapRegionSetBase::HeapRegionSetBase(const char* name)
+HeapRegionSetBase::HeapRegionSetBase(const char* name, bool humongous, bool empty, HRSMtSafeChecker* mt_safety_checker)
   : _name(name), _verify_in_progress(false),
-    _calc_length(0), _calc_region_num(0),
-    _calc_total_capacity_bytes(0), _calc_total_used_bytes(0) { }
-
-//////////////////// HeapRegionSet ////////////////////
-
-void HeapRegionSet::update_from_proxy(HeapRegionSet* proxy_set) {
-  hrs_assert_mt_safety_ok(this);
-  hrs_assert_mt_safety_ok(proxy_set);
-  hrs_assert_sets_match(this, proxy_set);
-
-  verify_optional();
-  proxy_set->verify_optional();
-
-  if (proxy_set->is_empty()) return;
+    _is_humongous(humongous), _is_empty(empty), _mt_safety_checker(mt_safety_checker),
+    _count()
+{ }

-  assert(proxy_set->length() <= _length,
-         hrs_err_msg("[%s] proxy set length: %u should be <= length: %u",
-                     name(), proxy_set->length(), _length));
-  _length -= proxy_set->length();
-
-  assert(proxy_set->region_num() <= _region_num,
-         hrs_err_msg("[%s] proxy set region num: %u should be <= region num: %u",
-                     name(), proxy_set->region_num(), _region_num));
-  _region_num -= proxy_set->region_num();
-
-  assert(proxy_set->total_used_bytes() <= _total_used_bytes,
-         hrs_err_msg("[%s] proxy set used bytes: "SIZE_FORMAT" "
-                     "should be <= used bytes: "SIZE_FORMAT,
-                     name(), proxy_set->total_used_bytes(),
-                     _total_used_bytes));
-  _total_used_bytes -= proxy_set->total_used_bytes();
-
-  proxy_set->clear();
-
-  verify_optional();
-  proxy_set->verify_optional();
+void FreeRegionList::set_unrealistically_long_length(uint len) {
+  guarantee(_unrealistically_long_length == 0, "should only be set once");
+  _unrealistically_long_length = len;
 }

-//////////////////// HeapRegionLinkedList ////////////////////
-
-void HeapRegionLinkedList::fill_in_ext_msg_extra(hrs_ext_msg* msg) {
+void FreeRegionList::fill_in_ext_msg_extra(hrs_ext_msg* msg) {
   msg->append(" hd: "PTR_FORMAT" tl: "PTR_FORMAT, head(), tail());
 }

-void HeapRegionLinkedList::add_as_head(HeapRegionLinkedList* from_list) {
-  hrs_assert_mt_safety_ok(this);
-  hrs_assert_mt_safety_ok(from_list);
+void FreeRegionList::add_as_head_or_tail(FreeRegionList* from_list, bool as_head) {
+  check_mt_safety();
+  from_list->check_mt_safety();

   verify_optional();
   from_list->verify_optional();

-  if (from_list->is_empty()) return;
+  if (from_list->is_empty()) {
+    return;
+  }

 #ifdef ASSERT
-  HeapRegionLinkedListIterator iter(from_list);
+  FreeRegionListIterator iter(from_list);
   while (iter.more_available()) {
     HeapRegion* hr = iter.get_next();
     // In set_containing_set() we check that we either set the value
@@ -276,35 +127,75 @@
   }
 #endif // ASSERT

-  if (_head != NULL) {
-    assert(length() >  0 && _tail != NULL, hrs_ext_msg(this, "invariant"));
-    from_list->_tail->set_next(_head);
-  } else {
+  if (_head == NULL) {
     assert(length() == 0 && _tail == NULL, hrs_ext_msg(this, "invariant"));
+    _head = from_list->_head;
     _tail = from_list->_tail;
+  } else {
+    assert(length() > 0 && _tail != NULL, hrs_ext_msg(this, "invariant"));
+    if (as_head) {
+      from_list->_tail->set_next(_head);
+      _head->set_prev(from_list->_tail);
+      _head = from_list->_head;
+    } else {
+      _tail->set_next(from_list->_head);
+      from_list->_head->set_prev(_tail);
+      _tail = from_list->_tail;
+    }
   }
-  _head = from_list->_head;

-  _length           += from_list->length();
-  _region_num       += from_list->region_num();
-  _total_used_bytes += from_list->total_used_bytes();
+  _count.increment(from_list->length(), from_list->total_capacity_bytes());
   from_list->clear();

   verify_optional();
   from_list->verify_optional();
 }

-void HeapRegionLinkedList::add_as_tail(HeapRegionLinkedList* from_list) {
-  hrs_assert_mt_safety_ok(this);
-  hrs_assert_mt_safety_ok(from_list);
+void FreeRegionList::add_as_head(FreeRegionList* from_list) {
+  add_as_head_or_tail(from_list, true /* as_head */);
+}
+
+void FreeRegionList::add_as_tail(FreeRegionList* from_list) {
+  add_as_head_or_tail(from_list, false /* as_head */);
+}
+
+void FreeRegionList::remove_all() {
+  check_mt_safety();
+  verify_optional();
+
+  HeapRegion* curr = _head;
+  while (curr != NULL) {
+    verify_region(curr);
+
+    HeapRegion* next = curr->next();
+    curr->set_next(NULL);
+    curr->set_prev(NULL);
+    curr->set_containing_set(NULL);
+    curr = next;
+  }
+  clear();
+
+  verify_optional();
+}
+
+void FreeRegionList::add_ordered(FreeRegionList* from_list) {
+  check_mt_safety();
+  from_list->check_mt_safety();

   verify_optional();
   from_list->verify_optional();

-  if (from_list->is_empty()) return;
+  if (from_list->is_empty()) {
+    return;
+  }

-#ifdef ASSERT
-  HeapRegionLinkedListIterator iter(from_list);
+  if (is_empty()) {
+    add_as_head(from_list);
+    return;
+  }
+
+  #ifdef ASSERT
+  FreeRegionListIterator iter(from_list);
   while (iter.more_available()) {
     HeapRegion* hr = iter.get_next();
     // In set_containing_set() we check that we either set the value
@@ -313,46 +204,50 @@
     hr->set_containing_set(NULL);
     hr->set_containing_set(this);
   }
-#endif // ASSERT
+  #endif // ASSERT
+
+  HeapRegion* curr_to = _head;
+  HeapRegion* curr_from = from_list->_head;
+
+  while (curr_from != NULL) {
+    while (curr_to != NULL && curr_to->hrs_index() < curr_from->hrs_index()) {
+      curr_to = curr_to->next();
+    }
+
+    if (curr_to == NULL) {
+      // The rest of the from list should be added as tail
+      _tail->set_next(curr_from);
+      curr_from->set_prev(_tail);
+      curr_from = NULL;
+    } else {
+      HeapRegion* next_from = curr_from->next();

-  if (_tail != NULL) {
-    assert(length() >  0 && _head != NULL, hrs_ext_msg(this, "invariant"));
-    _tail->set_next(from_list->_head);
-  } else {
-    assert(length() == 0 && _head == NULL, hrs_ext_msg(this, "invariant"));
-    _head = from_list->_head;
+      curr_from->set_next(curr_to);
+      curr_from->set_prev(curr_to->prev());
+      if (curr_to->prev() == NULL) {
+        _head = curr_from;
+      } else {
+        curr_to->prev()->set_next(curr_from);
+      }
+      curr_to->set_prev(curr_from);
+
+      curr_from = next_from;
+    }
   }
-  _tail = from_list->_tail;

-  _length           += from_list->length();
-  _region_num       += from_list->region_num();
-  _total_used_bytes += from_list->total_used_bytes();
+  if (_tail->hrs_index() < from_list->_tail->hrs_index()) {
+    _tail = from_list->_tail;
+  }
+
+  _count.increment(from_list->length(), from_list->total_capacity_bytes());
   from_list->clear();

   verify_optional();
   from_list->verify_optional();
 }

-void HeapRegionLinkedList::remove_all() {
-  hrs_assert_mt_safety_ok(this);
-  verify_optional();
-
-  HeapRegion* curr = _head;
-  while (curr != NULL) {
-    hrs_assert_region_ok(this, curr, this);
-
-    HeapRegion* next = curr->next();
-    curr->set_next(NULL);
-    curr->set_containing_set(NULL);
-    curr = next;
-  }
-  clear();
-
-  verify_optional();
-}
-
-void HeapRegionLinkedList::remove_all_pending(uint target_count) {
-  hrs_assert_mt_safety_ok(this);
+void FreeRegionList::remove_all_pending(uint target_count) {
+  check_mt_safety();
   assert(target_count > 1, hrs_ext_msg(this, "pre-condition"));
   assert(!is_empty(), hrs_ext_msg(this, "pre-condition"));

@@ -360,11 +255,11 @@
   DEBUG_ONLY(uint old_length = length();)

   HeapRegion* curr = _head;
-  HeapRegion* prev = NULL;
   uint count = 0;
   while (curr != NULL) {
-    hrs_assert_region_ok(this, curr, this);
+    verify_region(curr);
     HeapRegion* next = curr->next();
+    HeapRegion* prev = curr->prev();

     if (curr->pending_removal()) {
       assert(count < target_count,
@@ -384,10 +279,15 @@
         _tail = prev;
       } else {
         assert(_tail != curr, hrs_ext_msg(this, "invariant"));
+        next->set_prev(prev);
+      }
+      if (_last = curr) {
+        _last = NULL;
       }

       curr->set_next(NULL);
-      remove_internal(curr);
+      curr->set_prev(NULL);
+      remove(curr);
       curr->set_pending_removal(false);

       count += 1;
@@ -397,8 +297,6 @@
       // carry on iterating to make sure there are not more regions
       // tagged with pending removal.
       DEBUG_ONLY(if (count == target_count) break;)
-    } else {
-      prev = curr;
     }
     curr = next;
   }
@@ -414,46 +312,27 @@
   verify_optional();
 }

-void HeapRegionLinkedList::verify() {
+void FreeRegionList::verify() {
   // See comment in HeapRegionSetBase::verify() about MT safety and
   // verification.
-  hrs_assert_mt_safety_ok(this);
+  check_mt_safety();

   // This will also do the basic verification too.
   verify_start();

-  HeapRegion* curr  = _head;
-  HeapRegion* prev1 = NULL;
-  HeapRegion* prev0 = NULL;
-  uint        count = 0;
-  while (curr != NULL) {
-    verify_next_region(curr);
-
-    count += 1;
-    guarantee(count < _unrealistically_long_length,
-              hrs_err_msg("[%s] the calculated length: %u "
-                          "seems very long, is there maybe a cycle? "
-                          "curr: "PTR_FORMAT" prev0: "PTR_FORMAT" "
-                          "prev1: "PTR_FORMAT" length: %u",
-                          name(), count, curr, prev0, prev1, length()));
-
-    prev1 = prev0;
-    prev0 = curr;
-    curr  = curr->next();
-  }
-
-  guarantee(_tail == prev0, hrs_ext_msg(this, "post-condition"));
+  verify_list();

   verify_end();
 }

-void HeapRegionLinkedList::clear() {
-  HeapRegionSetBase::clear();
+void FreeRegionList::clear() {
+  _count = HeapRegionSetCount();
   _head = NULL;
   _tail = NULL;
+  _last = NULL;
 }

-void HeapRegionLinkedList::print_on(outputStream* out, bool print_contents) {
+void FreeRegionList::print_on(outputStream* out, bool print_contents) {
   HeapRegionSetBase::print_on(out, print_contents);
   out->print_cr("  Linking");
   out->print_cr("    head              : "PTR_FORMAT, _head);
@@ -461,10 +340,124 @@

   if (print_contents) {
     out->print_cr("  Contents");
-    HeapRegionLinkedListIterator iter(this);
+    FreeRegionListIterator iter(this);
     while (iter.more_available()) {
       HeapRegion* hr = iter.get_next();
       hr->print_on(out);
     }
   }
 }
+
+void FreeRegionList::verify_list() {
+  HeapRegion* curr = head();
+  HeapRegion* prev1 = NULL;
+  HeapRegion* prev0 = NULL;
+  uint count = 0;
+  size_t capacity = 0;
+  uint last_index = 0;
+
+  guarantee(_head == NULL || _head->prev() == NULL, "_head should not have a prev");
+  while (curr != NULL) {
+    verify_region(curr);
+
+    count++;
+    guarantee(count < _unrealistically_long_length,
+        hrs_err_msg("[%s] the calculated length: %u seems very long, is there maybe a cycle? curr: "PTR_FORMAT" prev0: "PTR_FORMAT" " "prev1: "PTR_FORMAT" length: %u", name(), count, curr, prev0, prev1, length()));
+
+    if (curr->next() != NULL) {
+      guarantee(curr->next()->prev() == curr, "Next or prev pointers messed up");
+    }
+    guarantee(curr->hrs_index() == 0 || curr->hrs_index() > last_index, "List should be sorted");
+    last_index = curr->hrs_index();
+
+    capacity += curr->capacity();
+
+    prev1 = prev0;
+    prev0 = curr;
+    curr = curr->next();
+  }
+
+  guarantee(tail() == prev0, err_msg("Expected %s to end with %u but it ended with %u.", name(), tail()->hrs_index(), prev0->hrs_index()));
+  guarantee(_tail == NULL || _tail->next() == NULL, "_tail should not have a next");
+  guarantee(length() == count, err_msg("%s count mismatch. Expected %u, actual %u.", name(), length(), count));
+  guarantee(total_capacity_bytes() == capacity, err_msg("%s capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+      name(), total_capacity_bytes(), capacity));
+}
+
+// Note on the check_mt_safety() methods below:
+//
+// Verification of the "master" heap region sets / lists that are
+// maintained by G1CollectedHeap is always done during a STW pause and
+// by the VM thread at the start / end of the pause. The standard
+// verification methods all assert check_mt_safety(). This is
+// important as it ensures that verification is done without
+// concurrent updates taking place at the same time. It follows, that,
+// for the "master" heap region sets / lists, the check_mt_safety()
+// method should include the VM thread / STW case.
+
+void MasterFreeRegionListMtSafeChecker::check() {
+  // Master Free List MT safety protocol:
+  // (a) If we're at a safepoint, operations on the master free list
+  // should be invoked by either the VM thread (which will serialize
+  // them) or by the GC workers while holding the
+  // FreeList_lock.
+  // (b) If we're not at a safepoint, operations on the master free
+  // list should be invoked while holding the Heap_lock.
+
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread() ||
+              FreeList_lock->owned_by_self(), "master free list MT safety protocol at a safepoint");
+  } else {
+    guarantee(Heap_lock->owned_by_self(), "master free list MT safety protocol outside a safepoint");
+  }
+}
+
+void SecondaryFreeRegionListMtSafeChecker::check() {
+  // Secondary Free List MT safety protocol:
+  // Operations on the secondary free list should always be invoked
+  // while holding the SecondaryFreeList_lock.
+
+  guarantee(SecondaryFreeList_lock->owned_by_self(), "secondary free list MT safety protocol");
+}
+
+void OldRegionSetMtSafeChecker::check() {
+  // Master Old Set MT safety protocol:
+  // (a) If we're at a safepoint, operations on the master old set
+  // should be invoked:
+  // - by the VM thread (which will serialize them), or
+  // - by the GC workers while holding the FreeList_lock, if we're
+  //   at a safepoint for an evacuation pause (this lock is taken
+  //   anyway when an GC alloc region is retired so that a new one
+  //   is allocated from the free list), or
+  // - by the GC workers while holding the OldSets_lock, if we're at a
+  //   safepoint for a cleanup pause.
+  // (b) If we're not at a safepoint, operations on the master old set
+  // should be invoked while holding the Heap_lock.
+
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread()
+        || FreeList_lock->owned_by_self() || OldSets_lock->owned_by_self(),
+        "master old set MT safety protocol at a safepoint");
+  } else {
+    guarantee(Heap_lock->owned_by_self(), "master old set MT safety protocol outside a safepoint");
+  }
+}
+
+void HumongousRegionSetMtSafeChecker::check() {
+  // Humongous Set MT safety protocol:
+  // (a) If we're at a safepoint, operations on the master humongous
+  // set should be invoked by either the VM thread (which will
+  // serialize them) or by the GC workers while holding the
+  // OldSets_lock.
+  // (b) If we're not at a safepoint, operations on the master
+  // humongous set should be invoked while holding the Heap_lock.
+
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread() ||
+              OldSets_lock->owned_by_self(),
+              "master humongous set MT safety protocol at a safepoint");
+  } else {
+    guarantee(Heap_lock->owned_by_self(),
+              "master humongous set MT safety protocol outside a safepoint");
+  }
+}
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,135 +38,108 @@
 #define HEAP_REGION_SET_FORCE_VERIFY defined(ASSERT)
 #endif // HEAP_REGION_SET_FORCE_VERIFY

-//////////////////// HeapRegionSetBase ////////////////////
+class hrs_ext_msg;
+
+class HRSMtSafeChecker : public CHeapObj<mtGC> {
+public:
+  virtual void check() = 0;
+};
+
+class MasterFreeRegionListMtSafeChecker    : public HRSMtSafeChecker { public: void check(); };
+class SecondaryFreeRegionListMtSafeChecker : public HRSMtSafeChecker { public: void check(); };
+class HumongousRegionSetMtSafeChecker      : public HRSMtSafeChecker { public: void check(); };
+class OldRegionSetMtSafeChecker            : public HRSMtSafeChecker { public: void check(); };
+
+class HeapRegionSetCount VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+  uint   _length;
+  size_t _capacity;
+
+public:
+  HeapRegionSetCount() : _length(0), _capacity(0) { }
+
+  const uint   length()   const { return _length;   }
+  const size_t capacity() const { return _capacity; }
+
+  void increment(uint length_to_add, size_t capacity_to_add) {
+    _length += length_to_add;
+    _capacity += capacity_to_add;
+  }
+
+  void decrement(const uint length_to_remove, const size_t capacity_to_remove) {
+    _length -= length_to_remove;
+    _capacity -= capacity_to_remove;
+  }
+};

 // Base class for all the classes that represent heap region sets. It
 // contains the basic attributes that each set needs to maintain
 // (e.g., length, region num, used bytes sum) plus any shared
 // functionality (e.g., verification).

-class hrs_ext_msg;
-
-typedef enum {
-  HRSPhaseNone,
-  HRSPhaseEvacuation,
-  HRSPhaseCleanup,
-  HRSPhaseFullGC
-} HRSPhase;
-
-class HRSPhaseSetter;
-
 class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC {
-  friend class hrs_ext_msg;
-  friend class HRSPhaseSetter;
   friend class VMStructs;
+private:
+  bool _is_humongous;
+  bool _is_empty;
+  HRSMtSafeChecker* _mt_safety_checker;

 protected:
-  static uint _unrealistically_long_length;
-
   // The number of regions added to the set. If the set contains
   // only humongous regions, this reflects only 'starts humongous'
   // regions and does not include 'continues humongous' ones.
-  uint _length;
-
-  // The total number of regions represented by the set. If the set
-  // does not contain humongous regions, this should be the same as
-  // _length. If the set contains only humongous regions, this will
-  // include the 'continues humongous' regions.
-  uint _region_num;
-
-  // We don't keep track of the total capacity explicitly, we instead
-  // recalculate it based on _region_num and the heap region size.
-
-  // The sum of used bytes in the all the regions in the set.
-  size_t _total_used_bytes;
+  HeapRegionSetCount _count;

   const char* _name;

-  bool        _verify_in_progress;
-  uint        _calc_length;
-  uint        _calc_region_num;
-  size_t      _calc_total_capacity_bytes;
-  size_t      _calc_total_used_bytes;
-
-  // This is here so that it can be used in the subclasses to assert
-  // something different depending on which phase the GC is in. This
-  // can be particularly helpful in the check_mt_safety() methods.
-  static HRSPhase _phase;
-
-  // Only used by HRSPhaseSetter.
-  static void clear_phase();
-  static void set_phase(HRSPhase phase);
+  bool _verify_in_progress;

   // verify_region() is used to ensure that the contents of a region
-  // added to / removed from a set are consistent. Different sets
-  // make different assumptions about the regions added to them. So
-  // each set can override verify_region_extra(), which is called
-  // from verify_region(), and do any extra verification it needs to
-  // perform in that.
-  virtual const char* verify_region_extra(HeapRegion* hr) { return NULL; }
-  bool verify_region(HeapRegion* hr,
-                     HeapRegionSetBase* expected_containing_set);
+  // added to / removed from a set are consistent.
+  void verify_region(HeapRegion* hr) PRODUCT_RETURN;

   // Indicates whether all regions in the set should be humongous or
   // not. Only used during verification.
-  virtual bool regions_humongous() = 0;
+  bool regions_humongous() { return _is_humongous; }

   // Indicates whether all regions in the set should be empty or
   // not. Only used during verification.
-  virtual bool regions_empty() = 0;
+  bool regions_empty() { return _is_empty; }
+
+  void check_mt_safety() {
+    if (_mt_safety_checker != NULL) {
+      _mt_safety_checker->check();
+    }
+  }
+
+  virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg) { }
+
+  HeapRegionSetBase(const char* name, bool humongous, bool empty, HRSMtSafeChecker* mt_safety_checker);
+
+public:
+  const char* name() { return _name; }

-  // Subclasses can optionally override this to do MT safety protocol
-  // checks. It is called in an assert from all methods that perform
-  // updates on the set (and subclasses should also call it too).
-  virtual bool check_mt_safety() { return true; }
+  uint length() { return _count.length(); }
+
+  bool is_empty() { return _count.length() == 0; }
+
+  size_t total_capacity_bytes() {
+    return _count.capacity();
+  }
+
+  // It updates the fields of the set to reflect hr being added to
+  // the set and tags the region appropriately.
+  inline void add(HeapRegion* hr);
+
+  // It updates the fields of the set to reflect hr being removed
+  // from the set and tags the region appropriately.
+  inline void remove(HeapRegion* hr);

   // fill_in_ext_msg() writes the the values of the set's attributes
   // in the custom err_msg (hrs_ext_msg). fill_in_ext_msg_extra()
   // allows subclasses to append further information.
-  virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg) { }
   void fill_in_ext_msg(hrs_ext_msg* msg, const char* message);

-  // It updates the fields of the set to reflect hr being added to
-  // the set.
-  inline void update_for_addition(HeapRegion* hr);
-
-  // It updates the fields of the set to reflect hr being added to
-  // the set and tags the region appropriately.
-  inline void add_internal(HeapRegion* hr);
-
-  // It updates the fields of the set to reflect hr being removed
-  // from the set.
-  inline void update_for_removal(HeapRegion* hr);
-
-  // It updates the fields of the set to reflect hr being removed
-  // from the set and tags the region appropriately.
-  inline void remove_internal(HeapRegion* hr);
-
-  // It clears all the fields of the sets. Note: it will not iterate
-  // over the set and remove regions from it. It assumes that the
-  // caller has already done so. It will literally just clear the fields.
-  virtual void clear();
-
-  HeapRegionSetBase(const char* name);
-
-public:
-  static void set_unrealistically_long_length(uint len);
-
-  const char* name() { return _name; }
-
-  uint length() { return _length; }
-
-  bool is_empty() { return _length == 0; }
-
-  uint region_num() { return _region_num; }
-
-  size_t total_capacity_bytes() {
-    return (size_t) region_num() << HeapRegion::LogOfHRGrainBytes;
-  }
-
-  size_t total_used_bytes() { return _total_used_bytes; }
-
   virtual void verify();
   void verify_start();
   void verify_next_region(HeapRegion* hr);
@@ -187,7 +160,6 @@
 // assert/guarantee-specific message it also prints out the values of
 // the fields of the associated set. This can be very helpful in
 // diagnosing failures.
-
 class hrs_ext_msg : public hrs_err_msg {
 public:
   hrs_ext_msg(HeapRegionSetBase* set, const char* message) : hrs_err_msg("") {
@@ -195,32 +167,6 @@
   }
 };

-class HRSPhaseSetter {
-public:
-  HRSPhaseSetter(HRSPhase phase) {
-    HeapRegionSetBase::set_phase(phase);
-  }
-  ~HRSPhaseSetter() {
-    HeapRegionSetBase::clear_phase();
-  }
-};
-
-// These two macros are provided for convenience, to keep the uses of
-// these two asserts a bit more concise.
-
-#define hrs_assert_mt_safety_ok(_set_)                                        \
-  do {                                                                        \
-    assert((_set_)->check_mt_safety(), hrs_ext_msg((_set_), "MT safety"));    \
-  } while (0)
-
-#define hrs_assert_region_ok(_set_, _hr_, _expected_)                         \
-  do {                                                                        \
-    assert((_set_)->verify_region((_hr_), (_expected_)),                      \
-           hrs_ext_msg((_set_), "region verification"));                      \
-  } while (0)
-
-//////////////////// HeapRegionSet ////////////////////
-
 #define hrs_assert_sets_match(_set1_, _set2_)                                 \
   do {                                                                        \
     assert(((_set1_)->regions_humongous() ==                                  \
@@ -236,63 +182,41 @@
 // the same interface (namely, the HeapRegionSetBase API).

 class HeapRegionSet : public HeapRegionSetBase {
-protected:
-  virtual const char* verify_region_extra(HeapRegion* hr) {
-    if (hr->next() != NULL) {
-      return "next() should always be NULL as we do not link the regions";
-    }
-
-    return HeapRegionSetBase::verify_region_extra(hr);
-  }
-
-  HeapRegionSet(const char* name) : HeapRegionSetBase(name) {
-    clear();
-  }
-
 public:
-  // It adds hr to the set. The region should not be a member of
-  // another set.
-  inline void add(HeapRegion* hr);
+  HeapRegionSet(const char* name, bool humongous, HRSMtSafeChecker* mt_safety_checker):
+    HeapRegionSetBase(name, humongous, false /* empty */, mt_safety_checker) { }

-  // It removes hr from the set. The region should be a member of
-  // this set.
-  inline void remove(HeapRegion* hr);
-
-  // It removes a region from the set. Instead of updating the fields
-  // of the set to reflect this removal, it accumulates the updates
-  // in proxy_set. The idea is that proxy_set is thread-local to
-  // avoid multiple threads updating the fields of the set
-  // concurrently and having to synchronize. The method
-  // update_from_proxy() will update the fields of the set from the
-  // proxy_set.
-  inline void remove_with_proxy(HeapRegion* hr, HeapRegionSet* proxy_set);
-
-  // After multiple calls to remove_with_proxy() the updates to the
-  // fields of the set are accumulated in proxy_set. This call
-  // updates the fields of the set from proxy_set.
-  void update_from_proxy(HeapRegionSet* proxy_set);
+  void bulk_remove(const HeapRegionSetCount& removed) {
+    _count.decrement(removed.length(), removed.capacity());
+  }
 };

-//////////////////// HeapRegionLinkedList ////////////////////
-
-// A set that links all the regions added to it in a singly-linked
+// A set that links all the regions added to it in a doubly-linked
 // list. We should try to avoid doing operations that iterate over
 // such lists in performance critical paths. Typically we should
-// add / remove one region at a time or concatenate two lists. All
-// those operations are done in constant time.
+// add / remove one region at a time or concatenate two lists. There are
+// two ways to treat your lists, ordered and un-ordered. All un-ordered
+// operations are done in constant time. To keep a list ordered only use
+// add_ordered() to add elements to the list. If a list is not ordered
+// from start, there is no way to sort it later.

-class HeapRegionLinkedListIterator;
+class FreeRegionListIterator;

-class HeapRegionLinkedList : public HeapRegionSetBase {
-  friend class HeapRegionLinkedListIterator;
+class FreeRegionList : public HeapRegionSetBase {
+  friend class FreeRegionListIterator;

 private:
   HeapRegion* _head;
   HeapRegion* _tail;

-  // These are provided for use by the friend classes.
-  HeapRegion* head() { return _head; }
-  HeapRegion* tail() { return _tail; }
+  // _last is used to keep track of where we added an element the last
+  // time in ordered lists. It helps to improve performance when adding
+  // several ordered items in a row.
+  HeapRegion* _last;
+
+  static uint _unrealistically_long_length;
+
+  void add_as_head_or_tail(FreeRegionList* from_list, bool as_head);

 protected:
   virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg);
@@ -300,11 +224,24 @@
   // See the comment for HeapRegionSetBase::clear()
   virtual void clear();

-  HeapRegionLinkedList(const char* name) : HeapRegionSetBase(name) {
+public:
+  FreeRegionList(const char* name, HRSMtSafeChecker* mt_safety_checker = NULL):
+    HeapRegionSetBase(name, false /* humongous */, true /* empty */, mt_safety_checker) {
     clear();
   }

-public:
+  void verify_list();
+
+  HeapRegion* head() { return _head; }
+  HeapRegion* tail() { return _tail; }
+
+  static void set_unrealistically_long_length(uint len);
+
+  // Add hr to the list. The region should not be a member of another set.
+  // Assumes that the list is ordered and will preserve that order. The order
+  // is determined by hrs_index.
+  inline void add_ordered(HeapRegion* hr);
+
   // It adds hr to the list as the new head. The region should not be
   // a member of another set.
   inline void add_as_head(HeapRegion* hr);
@@ -320,15 +257,29 @@
   // Convenience method.
   inline HeapRegion* remove_head_or_null();

+  // Removes and returns the last element (_tail) of the list. It assumes
+  // that the list isn't empty so that it can return a non-NULL value.
+  inline HeapRegion* remove_tail();
+
+  // Convenience method
+  inline HeapRegion* remove_tail_or_null();
+
+  // Removes from head or tail based on the given argument.
+  inline HeapRegion* remove_region(bool from_head);
+
+  // Merge two ordered lists. The result is also ordered. The order is
+  // determined by hrs_index.
+  void add_ordered(FreeRegionList* from_list);
+
   // It moves the regions from from_list to this list and empties
   // from_list. The new regions will appear in the same order as they
   // were in from_list and be linked in the beginning of this list.
-  void add_as_head(HeapRegionLinkedList* from_list);
+  void add_as_head(FreeRegionList* from_list);

   // It moves the regions from from_list to this list and empties
   // from_list. The new regions will appear in the same order as they
   // were in from_list and be linked in the end of this list.
-  void add_as_tail(HeapRegionLinkedList* from_list);
+  void add_as_tail(FreeRegionList* from_list);

   // It empties the list by removing all regions from it.
   void remove_all();
@@ -346,15 +297,13 @@
   virtual void print_on(outputStream* out, bool print_contents = false);
 };

-//////////////////// HeapRegionLinkedListIterator ////////////////////
-
 // Iterator class that provides a convenient way to iterate over the
 // regions of a HeapRegionLinkedList instance.

-class HeapRegionLinkedListIterator : public StackObj {
+class FreeRegionListIterator : public StackObj {
 private:
-  HeapRegionLinkedList* _list;
-  HeapRegion*           _curr;
+  FreeRegionList* _list;
+  HeapRegion*     _curr;

 public:
   bool more_available() {
@@ -369,13 +318,12 @@
     // do the "cycle" check.

     HeapRegion* hr = _curr;
-    assert(_list->verify_region(hr, _list), "region verification");
+    _list->verify_region(hr);
     _curr = hr->next();
     return hr;
   }

-  HeapRegionLinkedListIterator(HeapRegionLinkedList* list)
-    : _curr(NULL), _list(list) {
+  FreeRegionListIterator(FreeRegionList* list) : _curr(NULL), _list(list) {
     _curr = list->head();
   }
 };
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,116 +27,110 @@

 #include "gc_implementation/g1/heapRegionSet.hpp"

-//////////////////// HeapRegionSetBase ////////////////////
-
-inline void HeapRegionSetBase::update_for_addition(HeapRegion* hr) {
-  // Assumes the caller has already verified the region.
-
-  _length           += 1;
-  _region_num       += hr->region_num();
-  _total_used_bytes += hr->used();
-}
-
-inline void HeapRegionSetBase::add_internal(HeapRegion* hr) {
-  hrs_assert_region_ok(this, hr, NULL);
+inline void HeapRegionSetBase::add(HeapRegion* hr) {
+  check_mt_safety();
+  assert(hr->containing_set() == NULL, hrs_ext_msg(this, "should not already have a containing set %u"));
   assert(hr->next() == NULL, hrs_ext_msg(this, "should not already be linked"));

-  update_for_addition(hr);
+  _count.increment(1u, hr->capacity());
   hr->set_containing_set(this);
+  verify_region(hr);
 }

-inline void HeapRegionSetBase::update_for_removal(HeapRegion* hr) {
-  // Assumes the caller has already verified the region.
-  assert(_length > 0, hrs_ext_msg(this, "pre-condition"));
-  _length -= 1;
-
-  uint region_num_diff = hr->region_num();
-  assert(region_num_diff <= _region_num,
-         hrs_err_msg("[%s] region's region num: %u "
-                     "should be <= region num: %u",
-                     name(), region_num_diff, _region_num));
-  _region_num -= region_num_diff;
-
-  size_t used_bytes = hr->used();
-  assert(used_bytes <= _total_used_bytes,
-         hrs_err_msg("[%s] region's used bytes: "SIZE_FORMAT" "
-                     "should be <= used bytes: "SIZE_FORMAT,
-                     name(), used_bytes, _total_used_bytes));
-  _total_used_bytes -= used_bytes;
-}
-
-inline void HeapRegionSetBase::remove_internal(HeapRegion* hr) {
-  hrs_assert_region_ok(this, hr, this);
+inline void HeapRegionSetBase::remove(HeapRegion* hr) {
+  check_mt_safety();
+  verify_region(hr);
   assert(hr->next() == NULL, hrs_ext_msg(this, "should already be unlinked"));

   hr->set_containing_set(NULL);
-  update_for_removal(hr);
-}
-
-//////////////////// HeapRegionSet ////////////////////
-
-inline void HeapRegionSet::add(HeapRegion* hr) {
-  hrs_assert_mt_safety_ok(this);
-  // add_internal() will verify the region.
-  add_internal(hr);
-}
-
-inline void HeapRegionSet::remove(HeapRegion* hr) {
-  hrs_assert_mt_safety_ok(this);
-  // remove_internal() will verify the region.
-  remove_internal(hr);
+  assert(_count.length() > 0, hrs_ext_msg(this, "pre-condition"));
+  _count.decrement(1u, hr->capacity());
 }

-inline void HeapRegionSet::remove_with_proxy(HeapRegion* hr,
-                                             HeapRegionSet* proxy_set) {
-  // No need to fo the MT safety check here given that this method
-  // does not update the contents of the set but instead accumulates
-  // the changes in proxy_set which is assumed to be thread-local.
-  hrs_assert_sets_match(this, proxy_set);
-  hrs_assert_region_ok(this, hr, this);
-
-  hr->set_containing_set(NULL);
-  proxy_set->update_for_addition(hr);
-}
-
-//////////////////// HeapRegionLinkedList ////////////////////
-
-inline void HeapRegionLinkedList::add_as_head(HeapRegion* hr) {
-  hrs_assert_mt_safety_ok(this);
+inline void FreeRegionList::add_ordered(HeapRegion* hr) {
+  check_mt_safety();
   assert((length() == 0 && _head == NULL && _tail == NULL) ||
          (length() >  0 && _head != NULL && _tail != NULL),
          hrs_ext_msg(this, "invariant"));
-  // add_internal() will verify the region.
-  add_internal(hr);
+  // add() will verify the region and check mt safety.
+  add(hr);
+
+  // Now link the region
+  if (_head != NULL) {
+    HeapRegion* curr;
+
+    if (_last != NULL && _last->hrs_index() < hr->hrs_index()) {
+      curr = _last;
+    } else {
+      curr = _head;
+    }
+
+    // Find first entry with a Region Index larger than entry to insert.
+    while (curr != NULL && curr->hrs_index() < hr->hrs_index()) {
+      curr = curr->next();
+    }
+
+    hr->set_next(curr);
+
+    if (curr == NULL) {
+      // Adding at the end
+      hr->set_prev(_tail);
+      _tail->set_next(hr);
+      _tail = hr;
+    } else if (curr->prev() == NULL) {
+      // Adding at the beginning
+      hr->set_prev(NULL);
+      _head = hr;
+      curr->set_prev(hr);
+    } else {
+      hr->set_prev(curr->prev());
+      hr->prev()->set_next(hr);
+      curr->set_prev(hr);
+    }
+  } else {
+    // The list was empty
+    _tail = hr;
+    _head = hr;
+  }
+  _last = hr;
+}
+
+inline void FreeRegionList::add_as_head(HeapRegion* hr) {
+  assert((length() == 0 && _head == NULL && _tail == NULL) ||
+         (length() >  0 && _head != NULL && _tail != NULL),
+         hrs_ext_msg(this, "invariant"));
+  // add() will verify the region and check mt safety.
+  add(hr);

   // Now link the region.
   if (_head != NULL) {
     hr->set_next(_head);
+    _head->set_prev(hr);
   } else {
     _tail = hr;
   }
   _head = hr;
 }

-inline void HeapRegionLinkedList::add_as_tail(HeapRegion* hr) {
-  hrs_assert_mt_safety_ok(this);
+inline void FreeRegionList::add_as_tail(HeapRegion* hr) {
+  check_mt_safety();
   assert((length() == 0 && _head == NULL && _tail == NULL) ||
          (length() >  0 && _head != NULL && _tail != NULL),
          hrs_ext_msg(this, "invariant"));
-  // add_internal() will verify the region.
-  add_internal(hr);
+  // add() will verify the region and check mt safety.
+  add(hr);

   // Now link the region.
   if (_tail != NULL) {
     _tail->set_next(hr);
+    hr->set_prev(_tail);
   } else {
     _head = hr;
   }
   _tail = hr;
 }

-inline HeapRegion* HeapRegionLinkedList::remove_head() {
-  hrs_assert_mt_safety_ok(this);
+inline HeapRegion* FreeRegionList::remove_head() {
   assert(!is_empty(), hrs_ext_msg(this, "the list should not be empty"));
   assert(length() > 0 && _head != NULL && _tail != NULL,
          hrs_ext_msg(this, "invariant"));
@@ -146,17 +140,22 @@
   _head = hr->next();
   if (_head == NULL) {
     _tail = NULL;
+  } else {
+    _head->set_prev(NULL);
   }
   hr->set_next(NULL);

-  // remove_internal() will verify the region.
-  remove_internal(hr);
+  if (_last == hr) {
+    _last = NULL;
+  }
+
+  // remove() will verify the region and check mt safety.
+  remove(hr);
   return hr;
 }

-inline HeapRegion* HeapRegionLinkedList::remove_head_or_null() {
-  hrs_assert_mt_safety_ok(this);
-
+inline HeapRegion* FreeRegionList::remove_head_or_null() {
+  check_mt_safety();
   if (!is_empty()) {
     return remove_head();
   } else {
@@ -164,4 +163,47 @@
   }
 }

+inline HeapRegion* FreeRegionList::remove_tail() {
+  assert(!is_empty(), hrs_ext_msg(this, "The list should not be empty"));
+  assert(length() > 0 && _head != NULL && _tail != NULL,
+         hrs_ext_msg(this, "invariant"));
+
+  // We need to unlink it first
+  HeapRegion* hr = _tail;
+
+  _tail = hr->prev();
+  if (_tail == NULL) {
+    _head = NULL;
+  } else {
+    _tail->set_next(NULL);
+  }
+  hr->set_prev(NULL);
+
+  if (_last == hr) {
+    _last = NULL;
+  }
+
+  // remove() will verify the region and check mt safety.
+  remove(hr);
+  return hr;
+}
+
+inline HeapRegion* FreeRegionList::remove_tail_or_null() {
+  check_mt_safety();
+
+  if (!is_empty()) {
+    return remove_tail();
+  } else {
+    return NULL;
+  }
+}
+
+inline HeapRegion* FreeRegionList::remove_region(bool from_head) {
+  if (from_head) {
+    return remove_head_or_null();
+  } else {
+    return remove_tail_or_null();
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSET_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/heapRegionSets.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/g1/heapRegionRemSet.hpp"
-#include "gc_implementation/g1/heapRegionSets.hpp"
-
-// Note on the check_mt_safety() methods below:
-//
-// Verification of the "master" heap region sets / lists that are
-// maintained by G1CollectedHeap is always done during a STW pause and
-// by the VM thread at the start / end of the pause. The standard
-// verification methods all assert check_mt_safety(). This is
-// important as it ensures that verification is done without
-// concurrent updates taking place at the same time. It follows, that,
-// for the "master" heap region sets / lists, the check_mt_safety()
-// method should include the VM thread / STW case.
-
-//////////////////// FreeRegionList ////////////////////
-
-const char* FreeRegionList::verify_region_extra(HeapRegion* hr) {
-  if (hr->is_young()) {
-    return "the region should not be young";
-  }
-  // The superclass will check that the region is empty and
-  // not humongous.
-  return HeapRegionLinkedList::verify_region_extra(hr);
-}
-
-//////////////////// MasterFreeRegionList ////////////////////
-
-const char* MasterFreeRegionList::verify_region_extra(HeapRegion* hr) {
-  // We should reset the RSet for parallel iteration before we add it
-  // to the master free list so that it is ready when the region is
-  // re-allocated.
-  if (!hr->rem_set()->verify_ready_for_par_iteration()) {
-    return "the region's RSet should be ready for parallel iteration";
-  }
-  return FreeRegionList::verify_region_extra(hr);
-}
-
-bool MasterFreeRegionList::check_mt_safety() {
-  // Master Free List MT safety protocol:
-  // (a) If we're at a safepoint, operations on the master free list
-  // should be invoked by either the VM thread (which will serialize
-  // them) or by the GC workers while holding the
-  // FreeList_lock.
-  // (b) If we're not at a safepoint, operations on the master free
-  // list should be invoked while holding the Heap_lock.
-
-  if (SafepointSynchronize::is_at_safepoint()) {
-    guarantee(Thread::current()->is_VM_thread() ||
-              FreeList_lock->owned_by_self(),
-              hrs_ext_msg(this, "master free list MT safety protocol "
-                                "at a safepoint"));
-  } else {
-    guarantee(Heap_lock->owned_by_self(),
-              hrs_ext_msg(this, "master free list MT safety protocol "
-                                "outside a safepoint"));
-  }
-
-  return FreeRegionList::check_mt_safety();
-}
-
-//////////////////// SecondaryFreeRegionList ////////////////////
-
-bool SecondaryFreeRegionList::check_mt_safety() {
-  // Secondary Free List MT safety protocol:
-  // Operations on the secondary free list should always be invoked
-  // while holding the SecondaryFreeList_lock.
-
-  guarantee(SecondaryFreeList_lock->owned_by_self(),
-            hrs_ext_msg(this, "secondary free list MT safety protocol"));
-
-  return FreeRegionList::check_mt_safety();
-}
-
-//////////////////// OldRegionSet ////////////////////
-
-const char* OldRegionSet::verify_region_extra(HeapRegion* hr) {
-  if (hr->is_young()) {
-    return "the region should not be young";
-  }
-  // The superclass will check that the region is not empty and not
-  // humongous.
-  return HeapRegionSet::verify_region_extra(hr);
-}
-
-//////////////////// MasterOldRegionSet ////////////////////
-
-bool MasterOldRegionSet::check_mt_safety() {
-  // Master Old Set MT safety protocol:
-  // (a) If we're at a safepoint, operations on the master old set
-  // should be invoked:
-  // - by the VM thread (which will serialize them), or
-  // - by the GC workers while holding the FreeList_lock, if we're
-  //   at a safepoint for an evacuation pause (this lock is taken
-  //   anyway when an GC alloc region is retired so that a new one
-  //   is allocated from the free list), or
-  // - by the GC workers while holding the OldSets_lock, if we're at a
-  //   safepoint for a cleanup pause.
-  // (b) If we're not at a safepoint, operations on the master old set
-  // should be invoked while holding the Heap_lock.
-
-  if (SafepointSynchronize::is_at_safepoint()) {
-    guarantee(Thread::current()->is_VM_thread() ||
-              _phase == HRSPhaseEvacuation && FreeList_lock->owned_by_self() ||
-              _phase == HRSPhaseCleanup && OldSets_lock->owned_by_self(),
-              hrs_ext_msg(this, "master old set MT safety protocol "
-                                "at a safepoint"));
-  } else {
-    guarantee(Heap_lock->owned_by_self(),
-              hrs_ext_msg(this, "master old set MT safety protocol "
-                                "outside a safepoint"));
-  }
-
-  return OldRegionSet::check_mt_safety();
-}
-
-//////////////////// HumongousRegionSet ////////////////////
-
-const char* HumongousRegionSet::verify_region_extra(HeapRegion* hr) {
-  if (hr->is_young()) {
-    return "the region should not be young";
-  }
-  // The superclass will check that the region is not empty and
-  // humongous.
-  return HeapRegionSet::verify_region_extra(hr);
-}
-
-//////////////////// MasterHumongousRegionSet ////////////////////
-
-bool MasterHumongousRegionSet::check_mt_safety() {
-  // Master Humongous Set MT safety protocol:
-  // (a) If we're at a safepoint, operations on the master humongous
-  // set should be invoked by either the VM thread (which will
-  // serialize them) or by the GC workers while holding the
-  // OldSets_lock.
-  // (b) If we're not at a safepoint, operations on the master
-  // humongous set should be invoked while holding the Heap_lock.
-
-  if (SafepointSynchronize::is_at_safepoint()) {
-    guarantee(Thread::current()->is_VM_thread() ||
-              OldSets_lock->owned_by_self(),
-              hrs_ext_msg(this, "master humongous set MT safety protocol "
-                                "at a safepoint"));
-  } else {
-    guarantee(Heap_lock->owned_by_self(),
-              hrs_ext_msg(this, "master humongous set MT safety protocol "
-                                "outside a safepoint"));
-  }
-
-  return HumongousRegionSet::check_mt_safety();
-}
--- a/src/share/vm/gc_implementation/g1/heapRegionSets.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP
-
-#include "gc_implementation/g1/heapRegionSet.inline.hpp"
-
-//////////////////// FreeRegionList ////////////////////
-
-class FreeRegionList : public HeapRegionLinkedList {
-protected:
-  virtual const char* verify_region_extra(HeapRegion* hr);
-
-  virtual bool regions_humongous() { return false; }
-  virtual bool regions_empty()     { return true;  }
-
-public:
-  FreeRegionList(const char* name) : HeapRegionLinkedList(name) { }
-};
-
-//////////////////// MasterFreeRegionList ////////////////////
-
-class MasterFreeRegionList : public FreeRegionList {
-protected:
-  virtual const char* verify_region_extra(HeapRegion* hr);
-  virtual bool check_mt_safety();
-
-public:
-  MasterFreeRegionList(const char* name) : FreeRegionList(name) { }
-};
-
-//////////////////// SecondaryFreeRegionList ////////////////////
-
-class SecondaryFreeRegionList : public FreeRegionList {
-protected:
-  virtual bool check_mt_safety();
-
-public:
-  SecondaryFreeRegionList(const char* name) : FreeRegionList(name) { }
-};
-
-//////////////////// OldRegionSet ////////////////////
-
-class OldRegionSet : public HeapRegionSet {
-protected:
-  virtual const char* verify_region_extra(HeapRegion* hr);
-
-  virtual bool regions_humongous() { return false; }
-  virtual bool regions_empty()     { return false; }
-
-public:
-  OldRegionSet(const char* name) : HeapRegionSet(name) { }
-};
-
-//////////////////// MasterOldRegionSet ////////////////////
-
-class MasterOldRegionSet : public OldRegionSet {
-private:
-protected:
-  virtual bool check_mt_safety();
-
-public:
-  MasterOldRegionSet(const char* name) : OldRegionSet(name) { }
-};
-
-//////////////////// HumongousRegionSet ////////////////////
-
-class HumongousRegionSet : public HeapRegionSet {
-protected:
-  virtual const char* verify_region_extra(HeapRegion* hr);
-
-  virtual bool regions_humongous() { return true;  }
-  virtual bool regions_empty()     { return false; }
-
-public:
-  HumongousRegionSet(const char* name) : HeapRegionSet(name) { }
-};
-
-//////////////////// MasterHumongousRegionSet ////////////////////
-
-class MasterHumongousRegionSet : public HumongousRegionSet {
-protected:
-  virtual bool check_mt_safety();
-
-public:
-  MasterHumongousRegionSet(const char* name) : HumongousRegionSet(name) { }
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -219,58 +219,52 @@
 }

 #ifdef ASSERT
-void SATBMarkQueueSet::dump_active_values(JavaThread* first,
-                                          bool expected_active) {
-  gclog_or_tty->print_cr("SATB queue active values for Java Threads");
-  gclog_or_tty->print_cr(" SATB queue set: active is %s",
-                         (is_active()) ? "TRUE" : "FALSE");
-  gclog_or_tty->print_cr(" expected_active is %s",
-                         (expected_active) ? "TRUE" : "FALSE");
-  for (JavaThread* t = first; t; t = t->next()) {
-    bool active = t->satb_mark_queue().is_active();
-    gclog_or_tty->print_cr("  thread %s, active is %s",
-                           t->name(), (active) ? "TRUE" : "FALSE");
+void SATBMarkQueueSet::dump_active_states(bool expected_active) {
+  gclog_or_tty->print_cr("Expected SATB active state: %s",
+                         expected_active ? "ACTIVE" : "INACTIVE");
+  gclog_or_tty->print_cr("Actual SATB active states:");
+  gclog_or_tty->print_cr("  Queue set: %s", is_active() ? "ACTIVE" : "INACTIVE");
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    gclog_or_tty->print_cr("  Thread \"%s\" queue: %s", t->name(),
+                           t->satb_mark_queue().is_active() ? "ACTIVE" : "INACTIVE");
+  }
+  gclog_or_tty->print_cr("  Shared queue: %s",
+                         shared_satb_queue()->is_active() ? "ACTIVE" : "INACTIVE");
+}
+
+void SATBMarkQueueSet::verify_active_states(bool expected_active) {
+  // Verify queue set state
+  if (is_active() != expected_active) {
+    dump_active_states(expected_active);
+    guarantee(false, "SATB queue set has an unexpected active state");
+  }
+
+  // Verify thread queue states
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    if (t->satb_mark_queue().is_active() != expected_active) {
+      dump_active_states(expected_active);
+      guarantee(false, "Thread SATB queue has an unexpected active state");
+    }
+  }
+
+  // Verify shared queue state
+  if (shared_satb_queue()->is_active() != expected_active) {
+    dump_active_states(expected_active);
+    guarantee(false, "Shared SATB queue has an unexpected active state");
   }
 }
 #endif // ASSERT

-void SATBMarkQueueSet::set_active_all_threads(bool b,
-                                              bool expected_active) {
+void SATBMarkQueueSet::set_active_all_threads(bool active, bool expected_active) {
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
-  JavaThread* first = Threads::first();
-
 #ifdef ASSERT
-  if (_all_active != expected_active) {
-    dump_active_values(first, expected_active);
-
-    // I leave this here as a guarantee, instead of an assert, so
-    // that it will still be compiled in if we choose to uncomment
-    // the #ifdef ASSERT in a product build. The whole block is
-    // within an #ifdef ASSERT so the guarantee will not be compiled
-    // in a product build anyway.
-    guarantee(false,
-              "SATB queue set has an unexpected active value");
-  }
+  verify_active_states(expected_active);
 #endif // ASSERT
-  _all_active = b;
-
-  for (JavaThread* t = first; t; t = t->next()) {
-#ifdef ASSERT
-    bool active = t->satb_mark_queue().is_active();
-    if (active != expected_active) {
-      dump_active_values(first, expected_active);
-
-      // I leave this here as a guarantee, instead of an assert, so
-      // that it will still be compiled in if we choose to uncomment
-      // the #ifdef ASSERT in a product build. The whole block is
-      // within an #ifdef ASSERT so the guarantee will not be compiled
-      // in a product build anyway.
-      guarantee(false,
-                "thread has an unexpected active value in its SATB queue");
-    }
-#endif // ASSERT
-    t->satb_mark_queue().set_active(b);
+  _all_active = active;
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().set_active(active);
   }
+  shared_satb_queue()->set_active(active);
 }

 void SATBMarkQueueSet::filter_thread_buffers() {
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -87,7 +87,8 @@
   bool apply_closure_to_completed_buffer_work(bool par, int worker);

 #ifdef ASSERT
-  void dump_active_values(JavaThread* first, bool expected_active);
+  void dump_active_states(bool expected_active);
+  void verify_active_states(bool expected_active);
 #endif // ASSERT

 public:
@@ -99,11 +100,11 @@

   static void handle_zero_index_for_thread(JavaThread* t);

-  // Apply "set_active(b)" to all Java threads' SATB queues. It should be
+  // Apply "set_active(active)" to all SATB queues in the set. It should be
   // called only with the world stopped. The method will assert that the
   // SATB queues of all threads it visits, as well as the SATB queue
   // set itself, has an active value same as expected_active.
-  void set_active_all_threads(bool b, bool expected_active);
+  void set_active_all_threads(bool active, bool expected_active);

   // Filter all the currently-active SATB buffers.
   void filter_thread_buffers();
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -57,9 +57,10 @@
   nonstatic_field(G1MonitoringSupport, _old_committed,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_used,           size_t)           \
                                                                               \
-  nonstatic_field(HeapRegionSetBase,   _length,             uint)             \
-  nonstatic_field(HeapRegionSetBase,   _region_num,         uint)             \
-  nonstatic_field(HeapRegionSetBase,   _total_used_bytes,   size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _count,          HeapRegionSetCount)   \
+                                                                              \
+  nonstatic_field(HeapRegionSetCount,  _length,         uint)                 \
+  nonstatic_field(HeapRegionSetCount,  _capacity,       size_t)               \


 #define VM_TYPES_G1(declare_type, declare_toplevel_type)                      \
@@ -71,6 +72,7 @@
   declare_type(HeapRegion, ContiguousSpace)                                   \
   declare_toplevel_type(HeapRegionSeq)                                        \
   declare_toplevel_type(HeapRegionSetBase)                                    \
+  declare_toplevel_type(HeapRegionSetCount)                                   \
   declare_toplevel_type(G1MonitoringSupport)                                  \
                                                                               \
   declare_toplevel_type(G1CollectedHeap*)                                     \
--- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -139,11 +139,6 @@
       return true;
     }
   }
-  // No object starts in this slice; verify this using
-  // more traditional methods:  Note that no object can
-  // start before the start_addr.
-  assert(end_addr == start_addr ||
-         object_start(end_addr - 1) <= start_addr,
-         "Oops an object does start in this slice?");
+
   return false;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -492,6 +492,10 @@
   return young_gen()->eden_space()->tlab_capacity(thr);
 }

+size_t ParallelScavengeHeap::tlab_used(Thread* thr) const {
+  return young_gen()->eden_space()->tlab_used(thr);
+}
+
 size_t ParallelScavengeHeap::unsafe_max_tlab_alloc(Thread* thr) const {
   return young_gen()->eden_space()->unsafe_max_tlab_alloc(thr);
 }
@@ -665,8 +669,10 @@

 void ParallelScavengeHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) {
   const PSHeapSummary& heap_summary = create_ps_heap_summary();
+  gc_tracer->report_gc_heap_summary(when, heap_summary);
+
   const MetaspaceSummary& metaspace_summary = create_metaspace_summary();
-  gc_tracer->report_gc_heap_summary(when, heap_summary, metaspace_summary);
+  gc_tracer->report_metaspace_summary(when, metaspace_summary);
 }

 ParallelScavengeHeap* ParallelScavengeHeap::heap() {
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -189,6 +189,7 @@
   bool supports_tlab_allocation() const { return true; }

   size_t tlab_capacity(Thread* thr) const;
+  size_t tlab_used(Thread* thr) const;
   size_t unsafe_max_tlab_alloc(Thread* thr) const;

   // Can a compiler initialize a new object without store barriers?
--- a/src/share/vm/gc_implementation/shared/gcHeapSummary.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/gcHeapSummary.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_SHARED_GCHEAPSUMMARY_HPP

 #include "memory/allocation.hpp"
+#include "memory/metaspaceChunkFreeListSummary.hpp"

 class VirtualSpaceSummary : public StackObj {
   HeapWord* _start;
@@ -125,18 +126,49 @@
 };

 class MetaspaceSummary : public StackObj {
+  size_t _capacity_until_GC;
   MetaspaceSizes _meta_space;
   MetaspaceSizes _data_space;
   MetaspaceSizes _class_space;
+  MetaspaceChunkFreeListSummary _metaspace_chunk_free_list_summary;
+  MetaspaceChunkFreeListSummary _class_chunk_free_list_summary;

  public:
-  MetaspaceSummary() : _meta_space(), _data_space(), _class_space() {}
-  MetaspaceSummary(const MetaspaceSizes& meta_space, const MetaspaceSizes& data_space, const MetaspaceSizes& class_space) :
-       _meta_space(meta_space), _data_space(data_space), _class_space(class_space) { }
+  MetaspaceSummary() :
+    _capacity_until_GC(0),
+    _meta_space(),
+    _data_space(),
+    _class_space(),
+    _metaspace_chunk_free_list_summary(),
+    _class_chunk_free_list_summary()
+  {}
+  MetaspaceSummary(size_t capacity_until_GC,
+                   const MetaspaceSizes& meta_space,
+                   const MetaspaceSizes& data_space,
+                   const MetaspaceSizes& class_space,
+                   const MetaspaceChunkFreeListSummary& metaspace_chunk_free_list_summary,
+                   const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary) :
+    _capacity_until_GC(capacity_until_GC),
+    _meta_space(meta_space),
+    _data_space(data_space),
+    _class_space(class_space),
+    _metaspace_chunk_free_list_summary(metaspace_chunk_free_list_summary),
+    _class_chunk_free_list_summary(class_chunk_free_list_summary)
+  {}

+  size_t capacity_until_GC() const { return _capacity_until_GC; }
   const MetaspaceSizes& meta_space() const { return _meta_space; }
   const MetaspaceSizes& data_space() const { return _data_space; }
   const MetaspaceSizes& class_space() const { return _class_space; }
+
+  const MetaspaceChunkFreeListSummary& metaspace_chunk_free_list_summary() const {
+    return _metaspace_chunk_free_list_summary;
+  }
+
+  const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary() const {
+    return _class_chunk_free_list_summary;
+  }
+
 };

 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GCHEAPSUMMARY_HPP
--- a/src/share/vm/gc_implementation/shared/gcTrace.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/gcTrace.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -139,11 +139,21 @@
 }
 #endif // INCLUDE_SERVICES

-void GCTracer::report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary, const MetaspaceSummary& meta_space_summary) const {
+void GCTracer::report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary) const {
   assert_set_gc_id();

   send_gc_heap_summary_event(when, heap_summary);
-  send_meta_space_summary_event(when, meta_space_summary);
+}
+
+void GCTracer::report_metaspace_summary(GCWhen::Type when, const MetaspaceSummary& summary) const {
+  assert_set_gc_id();
+
+  send_meta_space_summary_event(when, summary);
+
+  send_metaspace_chunk_free_list_summary(when, Metaspace::NonClassType, summary.metaspace_chunk_free_list_summary());
+  if (UseCompressedClassPointers) {
+    send_metaspace_chunk_free_list_summary(when, Metaspace::ClassType, summary.class_chunk_free_list_summary());
+  }
 }

 void YoungGCTracer::report_gc_end_impl(const Ticks& timestamp, TimePartitions* time_partitions) {
--- a/src/share/vm/gc_implementation/shared/gcTrace.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/gcTrace.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -30,6 +30,7 @@
 #include "gc_implementation/shared/gcWhen.hpp"
 #include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "memory/allocation.hpp"
+#include "memory/metaspace.hpp"
 #include "memory/referenceType.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/g1YCTypes.hpp"
@@ -41,6 +42,7 @@

 class EvacuationInfo;
 class GCHeapSummary;
+class MetaspaceChunkFreeListSummary;
 class MetaspaceSummary;
 class PSHeapSummary;
 class ReferenceProcessorStats;
@@ -124,7 +126,8 @@
  public:
   void report_gc_start(GCCause::Cause cause, const Ticks& timestamp);
   void report_gc_end(const Ticks& timestamp, TimePartitions* time_partitions);
-  void report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary, const MetaspaceSummary& meta_space_summary) const;
+  void report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary) const;
+  void report_metaspace_summary(GCWhen::Type when, const MetaspaceSummary& metaspace_summary) const;
   void report_gc_reference_stats(const ReferenceProcessorStats& rp) const;
   void report_object_count_after_gc(BoolObjectClosure* object_filter) NOT_SERVICES_RETURN;
   bool has_reported_gc_start() const;
@@ -138,6 +141,7 @@
   void send_garbage_collection_event() const;
   void send_gc_heap_summary_event(GCWhen::Type when, const GCHeapSummary& heap_summary) const;
   void send_meta_space_summary_event(GCWhen::Type when, const MetaspaceSummary& meta_space_summary) const;
+  void send_metaspace_chunk_free_list_summary(GCWhen::Type when, Metaspace::MetadataType mdtype, const MetaspaceChunkFreeListSummary& summary) const;
   void send_reference_stats_event(ReferenceType type, size_t count) const;
   void send_phase_events(TimePartitions* time_partitions) const;
 };
--- a/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -64,6 +64,30 @@
   }
 }

+void GCTracer::send_metaspace_chunk_free_list_summary(GCWhen::Type when, Metaspace::MetadataType mdtype,
+                                                      const MetaspaceChunkFreeListSummary& summary) const {
+  EventMetaspaceChunkFreeListSummary e;
+  if (e.should_commit()) {
+    e.set_gcId(_shared_gc_info.id());
+    e.set_when(when);
+    e.set_metadataType(mdtype);
+
+    e.set_specializedChunks(summary.num_specialized_chunks());
+    e.set_specializedChunksTotalSize(summary.specialized_chunks_size_in_bytes());
+
+    e.set_smallChunks(summary.num_small_chunks());
+    e.set_smallChunksTotalSize(summary.small_chunks_size_in_bytes());
+
+    e.set_mediumChunks(summary.num_medium_chunks());
+    e.set_mediumChunksTotalSize(summary.medium_chunks_size_in_bytes());
+
+    e.set_humongousChunks(summary.num_humongous_chunks());
+    e.set_humongousChunksTotalSize(summary.humongous_chunks_size_in_bytes());
+
+    e.commit();
+  }
+}
+
 void ParallelOldTracer::send_parallel_old_event() const {
   EventGCParallelOld e(UNTIMED);
   if (e.should_commit()) {
@@ -246,6 +270,7 @@
   if (e.should_commit()) {
     e.set_gcId(_shared_gc_info.id());
     e.set_when((u1) when);
+    e.set_gcThreshold(meta_space_summary.capacity_until_GC());
     e.set_metaspace(to_trace_struct(meta_space_summary.meta_space()));
     e.set_dataSpace(to_trace_struct(meta_space_summary.data_space()));
     e.set_classSpace(to_trace_struct(meta_space_summary.class_space()));
--- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,10 +30,18 @@
 #include "utilities/stack.inline.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/parallelScavenge/psParallelCompact.hpp"
 #endif // INCLUDE_ALL_GCS

 inline void MarkSweep::mark_object(oop obj) {
+#if INCLUDE_ALL_GCS
+  if (G1StringDedup::is_enabled()) {
+    // We must enqueue the object before it is marked
+    // as we otherwise can't read the object's age.
+    G1StringDedup::enqueue_from_mark(obj);
+  }
+#endif
   // some marks may contain information we need to preserve so we store them away
   // and overwrite the mark.  We'll restore it at the end of markSweep.
   markOop mark = obj->mark();
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -173,6 +173,26 @@
   return lgrp_spaces()->at(i)->space()->capacity_in_bytes();
 }

+size_t MutableNUMASpace::tlab_used(Thread *thr) const {
+  // Please see the comments for tlab_capacity().
+  guarantee(thr != NULL, "No thread");
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1) {
+    if (lgrp_spaces()->length() > 0) {
+      return (used_in_bytes()) / lgrp_spaces()->length();
+    } else {
+      assert(false, "There should be at least one locality group");
+      return 0;
+    }
+  }
+  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
+  if (i == -1) {
+    return 0;
+  }
+  return lgrp_spaces()->at(i)->space()->used_in_bytes();
+}
+
+
 size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
   // Please see the comments for tlab_capacity().
   guarantee(thr != NULL, "No thread");
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -217,6 +217,7 @@
   using MutableSpace::capacity_in_words;
   virtual size_t capacity_in_words(Thread* thr) const;
   virtual size_t tlab_capacity(Thread* thr) const;
+  virtual size_t tlab_used(Thread* thr) const;
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;

   // Allocation (return NULL if full)
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -124,6 +124,7 @@
   virtual size_t used_in_words() const                    { return pointer_delta(top(), bottom()); }
   virtual size_t free_in_words() const                    { return pointer_delta(end(),    top()); }
   virtual size_t tlab_capacity(Thread* thr) const         { return capacity_in_bytes();            }
+  virtual size_t tlab_used(Thread* thr) const             { return used_in_bytes();                }
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const { return free_in_bytes();                }

   // Allocation (return NULL if full)
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -89,6 +89,10 @@
 // scavenge; it clears the sensor accumulators.
 void PLABStats::adjust_desired_plab_sz(uint no_of_gc_workers) {
   assert(ResizePLAB, "Not set");
+
+  assert(is_object_aligned(max_size()) && min_size() <= max_size(),
+         "PLAB clipping computation may be incorrect");
+
   if (_allocated == 0) {
     assert(_unused == 0,
            err_msg("Inconsistency in PLAB stats: "
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -181,16 +181,7 @@
     _used(0),
     _desired_plab_sz(desired_plab_sz_),
     _filter(wt)
-  {
-    size_t min_sz = min_size();
-    size_t max_sz = max_size();
-    size_t aligned_min_sz = align_object_size(min_sz);
-    size_t aligned_max_sz = align_object_size(max_sz);
-    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
-           min_sz <= max_sz,
-           "PLAB clipping computation in adjust_desired_plab_sz()"
-           " may be incorrect");
-  }
+  { }

   static const size_t min_size() {
     return ParGCAllocBuffer::min_size();
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -97,7 +97,13 @@
       MetaspaceAux::allocated_used_bytes(Metaspace::ClassType),
       MetaspaceAux::reserved_bytes(Metaspace::ClassType));

-  return MetaspaceSummary(meta_space, data_space, class_space);
+  const MetaspaceChunkFreeListSummary& ms_chunk_free_list_summary =
+    MetaspaceAux::chunk_free_list_summary(Metaspace::NonClassType);
+  const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary =
+    MetaspaceAux::chunk_free_list_summary(Metaspace::ClassType);
+
+  return MetaspaceSummary(MetaspaceGC::capacity_until_GC(), meta_space, data_space, class_space,
+                          ms_chunk_free_list_summary, class_chunk_free_list_summary);
 }

 void CollectedHeap::print_heap_before_gc() {
@@ -128,8 +134,10 @@

 void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) {
   const GCHeapSummary& heap_summary = create_heap_summary();
+  gc_tracer->report_gc_heap_summary(when, heap_summary);
+
   const MetaspaceSummary& metaspace_summary = create_metaspace_summary();
-  gc_tracer->report_gc_heap_summary(when, heap_summary, metaspace_summary);
+  gc_tracer->report_metaspace_summary(when, metaspace_summary);
 }

 void CollectedHeap::trace_heap_before_gc(GCTracer* gc_tracer) {
@@ -320,6 +328,21 @@
   assert(thread->deferred_card_mark().is_empty(), "invariant");
 }

+size_t CollectedHeap::max_tlab_size() const {
+  // TLABs can't be bigger than we can fill with a int[Integer.MAX_VALUE].
+  // This restriction could be removed by enabling filling with multiple arrays.
+  // If we compute that the reasonable way as
+  //    header_size + ((sizeof(jint) * max_jint) / HeapWordSize)
+  // we'll overflow on the multiply, so we do the divide first.
+  // We actually lose a little by dividing first,
+  // but that just makes the TLAB  somewhat smaller than the biggest array,
+  // which is fine, since we'll be able to fill that.
+  size_t max_int_size = typeArrayOopDesc::header_size(T_INT) +
+              sizeof(jint) *
+              ((juint) max_jint / (size_t) HeapWordSize);
+  return align_size_down(max_int_size, MinObjAlignment);
+}
+
 // Helper for ReduceInitialCardMarks. For performance,
 // compiled code may elide card-marks for initializing stores
 // to a newly allocated object along the fast-path. We
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -403,14 +403,16 @@
   // the following methods:
   // Returns "true" iff the heap supports thread-local allocation buffers.
   // The default is "no".
-  virtual bool supports_tlab_allocation() const {
-    return false;
-  }
+  virtual bool supports_tlab_allocation() const = 0;
+
   // The amount of space available for thread-local allocation buffers.
-  virtual size_t tlab_capacity(Thread *thr) const {
-    guarantee(false, "thread-local allocation buffers not supported");
-    return 0;
-  }
+  virtual size_t tlab_capacity(Thread *thr) const = 0;
+
+  // The amount of used space for thread-local allocation buffers for the given thread.
+  virtual size_t tlab_used(Thread *thr) const = 0;
+
+  virtual size_t max_tlab_size() const;
+
   // An estimate of the maximum allocation that could be performed
   // for thread-local allocation buffers without triggering any
   // collection or expansion activity.
--- a/src/share/vm/interpreter/bytecodeTracer.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/interpreter/bytecodeTracer.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -596,7 +596,7 @@
     if (data != NULL) {
       st->print("  %d", mdo->dp_to_di(data->dp()));
       st->fill_to(6);
-      data->print_data_on(st);
+      data->print_data_on(st, mdo);
     }
   }
 }
--- a/src/share/vm/memory/defNewGeneration.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1086,6 +1086,10 @@
   return eden()->capacity();
 }

+size_t DefNewGeneration::tlab_used() const {
+  return eden()->used();
+}
+
 size_t DefNewGeneration::unsafe_max_tlab_alloc() const {
   return unsafe_max_alloc_nogc();
 }
--- a/src/share/vm/memory/defNewGeneration.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/defNewGeneration.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -239,6 +239,7 @@
   // Thread-local allocation buffers
   bool supports_tlab_allocation() const { return true; }
   size_t tlab_capacity() const;
+  size_t tlab_used() const;
   size_t unsafe_max_tlab_alloc() const;

   // Grow the generation by the specified number of bytes.
--- a/src/share/vm/memory/freeList.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/freeList.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,7 @@

 #if INCLUDE_ALL_GCS
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "gc_implementation/g1/g1CodeCacheRemSet.hpp"
 #endif // INCLUDE_ALL_GCS

 // Free list.  A FreeList is used to access a linked list of chunks
@@ -332,4 +333,5 @@
 template class FreeList<Metachunk>;
 #if INCLUDE_ALL_GCS
 template class FreeList<FreeChunk>;
+template class FreeList<G1CodeRootChunk>;
 #endif // INCLUDE_ALL_GCS
--- a/src/share/vm/memory/genCollectedHeap.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -937,6 +937,16 @@
   return result;
 }

+size_t GenCollectedHeap::tlab_used(Thread* thr) const {
+  size_t result = 0;
+  for (int i = 0; i < _n_gens; i += 1) {
+    if (_gens[i]->supports_tlab_allocation()) {
+      result += _gens[i]->tlab_used();
+    }
+  }
+  return result;
+}
+
 size_t GenCollectedHeap::unsafe_max_tlab_alloc(Thread* thr) const {
   size_t result = 0;
   for (int i = 0; i < _n_gens; i += 1) {
--- a/src/share/vm/memory/genCollectedHeap.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -256,6 +256,7 @@
   // Section on TLAB's.
   virtual bool supports_tlab_allocation() const;
   virtual size_t tlab_capacity(Thread* thr) const;
+  virtual size_t tlab_used(Thread* thr) const;
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
   virtual HeapWord* allocate_new_tlab(size_t size);
--- a/src/share/vm/memory/generation.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/generation.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -299,6 +299,10 @@
     guarantee(false, "Generation doesn't support thread local allocation buffers");
     return 0;
   }
+  virtual size_t tlab_used() const {
+    guarantee(false, "Generation doesn't support thread local allocation buffers");
+    return 0;
+  }
   virtual size_t unsafe_max_tlab_alloc() const {
     guarantee(false, "Generation doesn't support thread local allocation buffers");
     return 0;
--- a/src/share/vm/memory/metaspace.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/metaspace.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -32,7 +32,9 @@
 #include "memory/gcLocker.hpp"
 #include "memory/metachunk.hpp"
 #include "memory/metaspace.hpp"
+#include "memory/metaspaceGCThresholdUpdater.hpp"
 #include "memory/metaspaceShared.hpp"
+#include "memory/metaspaceTracer.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "runtime/atomic.inline.hpp"
@@ -57,6 +59,7 @@
 MetaWord* last_allocated = 0;

 size_t Metaspace::_compressed_class_space_size;
+const MetaspaceTracer* Metaspace::_tracer = NULL;

 // Used in declarations in SpaceManager and ChunkManager
 enum ChunkIndex {
@@ -182,6 +185,48 @@
   // Remove from a list by size.  Selects list based on size of chunk.
   Metachunk* free_chunks_get(size_t chunk_word_size);

+#define index_bounds_check(index)                                         \
+  assert(index == SpecializedIndex ||                                     \
+         index == SmallIndex ||                                           \
+         index == MediumIndex ||                                          \
+         index == HumongousIndex, err_msg("Bad index: %d", (int) index))
+
+  size_t num_free_chunks(ChunkIndex index) const {
+    index_bounds_check(index);
+
+    if (index == HumongousIndex) {
+      return _humongous_dictionary.total_free_blocks();
+    }
+
+    ssize_t count = _free_chunks[index].count();
+    return count == -1 ? 0 : (size_t) count;
+  }
+
+  size_t size_free_chunks_in_bytes(ChunkIndex index) const {
+    index_bounds_check(index);
+
+    size_t word_size = 0;
+    if (index == HumongousIndex) {
+      word_size = _humongous_dictionary.total_size();
+    } else {
+      const size_t size_per_chunk_in_words = _free_chunks[index].size();
+      word_size = size_per_chunk_in_words * num_free_chunks(index);
+    }
+
+    return word_size * BytesPerWord;
+  }
+
+  MetaspaceChunkFreeListSummary chunk_free_list_summary() const {
+    return MetaspaceChunkFreeListSummary(num_free_chunks(SpecializedIndex),
+                                         num_free_chunks(SmallIndex),
+                                         num_free_chunks(MediumIndex),
+                                         num_free_chunks(HumongousIndex),
+                                         size_free_chunks_in_bytes(SpecializedIndex),
+                                         size_free_chunks_in_bytes(SmallIndex),
+                                         size_free_chunks_in_bytes(MediumIndex),
+                                         size_free_chunks_in_bytes(HumongousIndex));
+  }
+
   // Debug support
   void verify();
   void slow_verify() {
@@ -1436,19 +1481,21 @@
     expand_bytes = align_size_up(expand_bytes, Metaspace::commit_alignment());
     // Don't expand unless it's significant
     if (expand_bytes >= MinMetaspaceExpansion) {
-      MetaspaceGC::inc_capacity_until_GC(expand_bytes);
-    }
-    if (PrintGCDetails && Verbose) {
-      size_t new_capacity_until_GC = capacity_until_GC;
-      gclog_or_tty->print_cr("    expanding:"
-                    "  minimum_desired_capacity: %6.1fKB"
-                    "  expand_bytes: %6.1fKB"
-                    "  MinMetaspaceExpansion: %6.1fKB"
-                    "  new metaspace HWM:  %6.1fKB",
-                    minimum_desired_capacity / (double) K,
-                    expand_bytes / (double) K,
-                    MinMetaspaceExpansion / (double) K,
-                    new_capacity_until_GC / (double) K);
+      size_t new_capacity_until_GC = MetaspaceGC::inc_capacity_until_GC(expand_bytes);
+      Metaspace::tracer()->report_gc_threshold(capacity_until_GC,
+                                               new_capacity_until_GC,
+                                               MetaspaceGCThresholdUpdater::ComputeNewSize);
+      if (PrintGCDetails && Verbose) {
+        gclog_or_tty->print_cr("    expanding:"
+                      "  minimum_desired_capacity: %6.1fKB"
+                      "  expand_bytes: %6.1fKB"
+                      "  MinMetaspaceExpansion: %6.1fKB"
+                      "  new metaspace HWM:  %6.1fKB",
+                      minimum_desired_capacity / (double) K,
+                      expand_bytes / (double) K,
+                      MinMetaspaceExpansion / (double) K,
+                      new_capacity_until_GC / (double) K);
+      }
     }
     return;
   }
@@ -1527,7 +1574,10 @@
   // Don't shrink unless it's significant
   if (shrink_bytes >= MinMetaspaceExpansion &&
       ((capacity_until_GC - shrink_bytes) >= MetaspaceSize)) {
-    MetaspaceGC::dec_capacity_until_GC(shrink_bytes);
+    size_t new_capacity_until_GC = MetaspaceGC::dec_capacity_until_GC(shrink_bytes);
+    Metaspace::tracer()->report_gc_threshold(capacity_until_GC,
+                                             new_capacity_until_GC,
+                                             MetaspaceGCThresholdUpdater::ComputeNewSize);
   }
 }

@@ -2628,6 +2678,19 @@
   return free_chunks_total_words() * BytesPerWord;
 }

+bool MetaspaceAux::has_chunk_free_list(Metaspace::MetadataType mdtype) {
+  return Metaspace::get_chunk_manager(mdtype) != NULL;
+}
+
+MetaspaceChunkFreeListSummary MetaspaceAux::chunk_free_list_summary(Metaspace::MetadataType mdtype) {
+  if (!has_chunk_free_list(mdtype)) {
+    return MetaspaceChunkFreeListSummary();
+  }
+
+  const ChunkManager* cm = Metaspace::get_chunk_manager(mdtype);
+  return cm->chunk_free_list_summary();
+}
+
 void MetaspaceAux::print_metaspace_change(size_t prev_metadata_used) {
   gclog_or_tty->print(", [Metaspace:");
   if (PrintGCDetails && Verbose) {
@@ -3131,6 +3194,7 @@
   }

   MetaspaceGC::initialize();
+  _tracer = new MetaspaceTracer();
 }

 Metachunk* Metaspace::get_initialization_chunk(MetadataType mdtype,
@@ -3219,8 +3283,12 @@
   assert(delta_bytes > 0, "Must be");

   size_t after_inc = MetaspaceGC::inc_capacity_until_GC(delta_bytes);
+
+  // capacity_until_GC might be updated concurrently, must calculate previous value.
   size_t before_inc = after_inc - delta_bytes;

+  tracer()->report_gc_threshold(before_inc, after_inc,
+                                MetaspaceGCThresholdUpdater::ExpandAndAllocate);
   if (PrintGCDetails && Verbose) {
     gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT
         " to " SIZE_FORMAT, before_inc, after_inc);
@@ -3344,6 +3412,8 @@
   MetaWord* result = loader_data->metaspace_non_null()->allocate(word_size, mdtype);

   if (result == NULL) {
+    tracer()->report_metaspace_allocation_failure(loader_data, word_size, type, mdtype);
+
     // Allocation failed.
     if (is_init_completed()) {
       // Only start a GC if the bootstrapping has completed.
@@ -3355,7 +3425,7 @@
   }

   if (result == NULL) {
-    report_metadata_oome(loader_data, word_size, mdtype, CHECK_NULL);
+    report_metadata_oome(loader_data, word_size, type, mdtype, CHECK_NULL);
   }

   // Zero initialize.
@@ -3369,7 +3439,9 @@
   return class_vsm()->calc_chunk_size(word_size);
 }

-void Metaspace::report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, MetadataType mdtype, TRAPS) {
+void Metaspace::report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, MetaspaceObj::Type type, MetadataType mdtype, TRAPS) {
+  tracer()->report_metadata_oom(loader_data, word_size, type, mdtype);
+
   // If result is still null, we are out of memory.
   if (Verbose && TraceMetadataChunkAllocation) {
     gclog_or_tty->print_cr("Metaspace allocation failed for size "
@@ -3412,6 +3484,16 @@
   }
 }

+const char* Metaspace::metadata_type_name(Metaspace::MetadataType mdtype) {
+  switch (mdtype) {
+    case Metaspace::ClassType: return "Class";
+    case Metaspace::NonClassType: return "Metadata";
+    default:
+      assert(false, err_msg("Got bad mdtype: %d", (int) mdtype));
+      return NULL;
+  }
+}
+
 void Metaspace::record_allocation(void* ptr, MetaspaceObj::Type type, size_t word_size) {
   assert(DumpSharedSpaces, "sanity");
--- a/src/share/vm/memory/metaspace.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/metaspace.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -26,6 +26,7 @@

 #include "memory/allocation.hpp"
 #include "memory/memRegion.hpp"
+#include "memory/metaspaceChunkFreeListSummary.hpp"
 #include "runtime/virtualspace.hpp"
 #include "utilities/exceptions.hpp"

@@ -60,6 +61,7 @@
 class ClassLoaderData;
 class Metablock;
 class Metachunk;
+class MetaspaceTracer;
 class MetaWord;
 class Mutex;
 class outputStream;
@@ -149,6 +151,8 @@
   static ChunkManager* _chunk_manager_metadata;
   static ChunkManager* _chunk_manager_class;

+  static const MetaspaceTracer* _tracer;
+
  public:
   static VirtualSpaceList* space_list()       { return _space_list; }
   static VirtualSpaceList* class_space_list() { return _class_space_list; }
@@ -164,6 +168,8 @@
     return mdtype == ClassType ? chunk_manager_class() : chunk_manager_metadata();
   }

+  static const MetaspaceTracer* tracer() { return _tracer; }
+
  private:
   // This is used by DumpSharedSpaces only, where only _vsm is used. So we will
   // maintain a single list for now.
@@ -234,7 +240,9 @@
   static void purge();

   static void report_metadata_oome(ClassLoaderData* loader_data, size_t word_size,
-                                   MetadataType mdtype, TRAPS);
+                                   MetaspaceObj::Type type, MetadataType mdtype, TRAPS);
+
+  static const char* metadata_type_name(Metaspace::MetadataType mdtype);

   void print_on(outputStream* st) const;
   // Debugging support
@@ -348,6 +356,9 @@
     return min_chunk_size_words() * BytesPerWord;
   }

+  static bool has_chunk_free_list(Metaspace::MetadataType mdtype);
+  static MetaspaceChunkFreeListSummary chunk_free_list_summary(Metaspace::MetadataType mdtype);
+
   // Print change in used metadata.
   static void print_metaspace_change(size_t prev_metadata_used);
   static void print_on(outputStream * out);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metaspaceChunkFreeListSummary.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
+#define SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
+
+#include "memory/allocation.hpp"
+
+class MetaspaceChunkFreeListSummary VALUE_OBJ_CLASS_SPEC {
+  size_t _num_specialized_chunks;
+  size_t _num_small_chunks;
+  size_t _num_medium_chunks;
+  size_t _num_humongous_chunks;
+
+  size_t _specialized_chunks_size_in_bytes;
+  size_t _small_chunks_size_in_bytes;
+  size_t _medium_chunks_size_in_bytes;
+  size_t _humongous_chunks_size_in_bytes;
+
+ public:
+  MetaspaceChunkFreeListSummary() :
+    _num_specialized_chunks(0),
+    _num_small_chunks(0),
+    _num_medium_chunks(0),
+    _num_humongous_chunks(0),
+    _specialized_chunks_size_in_bytes(0),
+    _small_chunks_size_in_bytes(0),
+    _medium_chunks_size_in_bytes(0),
+    _humongous_chunks_size_in_bytes(0)
+  {}
+
+  MetaspaceChunkFreeListSummary(size_t num_specialized_chunks,
+                                size_t num_small_chunks,
+                                size_t num_medium_chunks,
+                                size_t num_humongous_chunks,
+                                size_t specialized_chunks_size_in_bytes,
+                                size_t small_chunks_size_in_bytes,
+                                size_t medium_chunks_size_in_bytes,
+                                size_t humongous_chunks_size_in_bytes) :
+    _num_specialized_chunks(num_specialized_chunks),
+    _num_small_chunks(num_small_chunks),
+    _num_medium_chunks(num_medium_chunks),
+    _num_humongous_chunks(num_humongous_chunks),
+    _specialized_chunks_size_in_bytes(specialized_chunks_size_in_bytes),
+    _small_chunks_size_in_bytes(small_chunks_size_in_bytes),
+    _medium_chunks_size_in_bytes(medium_chunks_size_in_bytes),
+    _humongous_chunks_size_in_bytes(humongous_chunks_size_in_bytes)
+  {}
+
+  size_t num_specialized_chunks() const {
+    return _num_specialized_chunks;
+  }
+
+  size_t num_small_chunks() const {
+    return _num_small_chunks;
+  }
+
+  size_t num_medium_chunks() const {
+    return _num_medium_chunks;
+  }
+
+  size_t num_humongous_chunks() const {
+    return _num_humongous_chunks;
+  }
+
+  size_t specialized_chunks_size_in_bytes() const {
+    return _specialized_chunks_size_in_bytes;
+  }
+
+  size_t small_chunks_size_in_bytes() const {
+    return _small_chunks_size_in_bytes;
+  }
+
+  size_t medium_chunks_size_in_bytes() const {
+    return _medium_chunks_size_in_bytes;
+  }
+
+  size_t humongous_chunks_size_in_bytes() const {
+    return _humongous_chunks_size_in_bytes;
+  }
+};
+
+#endif // SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metaspaceGCThresholdUpdater.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
+#define SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/debug.hpp"
+
+class MetaspaceGCThresholdUpdater : public AllStatic {
+ public:
+  enum Type {
+    ComputeNewSize,
+    ExpandAndAllocate,
+    Last
+  };
+
+  static const char* to_string(MetaspaceGCThresholdUpdater::Type updater) {
+    switch (updater) {
+      case ComputeNewSize:
+        return "compute_new_size";
+      case ExpandAndAllocate:
+        return "expand_and_allocate";
+      default:
+        assert(false, err_msg("Got bad updater: %d", (int) updater));
+        return NULL;
+    };
+  }
+};
+
+#endif // SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metaspaceTracer.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/classLoaderData.hpp"
+#include "memory/metaspaceTracer.hpp"
+#include "oops/oop.inline.hpp"
+#include "trace/tracing.hpp"
+#include "trace/traceBackend.hpp"
+
+void MetaspaceTracer::report_gc_threshold(size_t old_val,
+                                          size_t new_val,
+                                          MetaspaceGCThresholdUpdater::Type updater) const {
+  EventMetaspaceGCThreshold event;
+  if (event.should_commit()) {
+    event.set_oldValue(old_val);
+    event.set_newValue(new_val);
+    event.set_updater((u1)updater);
+    event.commit();
+  }
+}
+
+void MetaspaceTracer::report_metaspace_allocation_failure(ClassLoaderData *cld,
+                                                          size_t word_size,
+                                                          MetaspaceObj::Type objtype,
+                                                          Metaspace::MetadataType mdtype) const {
+  send_allocation_failure_event<EventMetaspaceAllocationFailure>(cld, word_size, objtype, mdtype);
+}
+
+void MetaspaceTracer::report_metadata_oom(ClassLoaderData *cld,
+                                         size_t word_size,
+                                         MetaspaceObj::Type objtype,
+                                         Metaspace::MetadataType mdtype) const {
+  send_allocation_failure_event<EventMetaspaceOOM>(cld, word_size, objtype, mdtype);
+}
+
+template <typename E>
+void MetaspaceTracer::send_allocation_failure_event(ClassLoaderData *cld,
+                                                    size_t word_size,
+                                                    MetaspaceObj::Type objtype,
+                                                    Metaspace::MetadataType mdtype) const {
+  E event;
+  if (event.should_commit()) {
+    if (cld->is_anonymous()) {
+      event.set_classLoader(NULL);
+      event.set_anonymousClassLoader(true);
+    } else {
+      if (cld->is_the_null_class_loader_data()) {
+        event.set_classLoader((Klass*) NULL);
+      } else {
+        event.set_classLoader(cld->class_loader()->klass());
+      }
+      event.set_anonymousClassLoader(false);
+    }
+
+    event.set_size(word_size * BytesPerWord);
+    event.set_metadataType((u1) mdtype);
+    event.set_metaspaceObjectType((u1) objtype);
+    event.commit();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metaspaceTracer.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_METASPACE_TRACER_HPP
+#define SHARE_VM_MEMORY_METASPACE_TRACER_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/metaspace.hpp"
+#include "memory/metaspaceGCThresholdUpdater.hpp"
+
+class ClassLoaderData;
+
+class MetaspaceTracer : public CHeapObj<mtTracing> {
+  template <typename E>
+  void send_allocation_failure_event(ClassLoaderData *cld,
+                                     size_t word_size,
+                                     MetaspaceObj::Type objtype,
+                                     Metaspace::MetadataType mdtype) const;
+ public:
+  void report_gc_threshold(size_t old_val,
+                           size_t new_val,
+                           MetaspaceGCThresholdUpdater::Type updater) const;
+  void report_metaspace_allocation_failure(ClassLoaderData *cld,
+                                           size_t word_size,
+                                           MetaspaceObj::Type objtype,
+                                           Metaspace::MetadataType mdtype) const;
+  void report_metadata_oom(ClassLoaderData *cld,
+                           size_t word_size,
+                           MetaspaceObj::Type objtype,
+                           Metaspace::MetadataType mdtype) const;
+
+};
+
+#endif // SHARE_VM_MEMORY_METASPACE_TRACER_HPP
--- a/src/share/vm/memory/padded.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/padded.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -90,4 +90,23 @@
   static PaddedEnd<T>* create_unfreeable(uint length);
 };

+// Helper class to create an array of references to arrays of primitive types
+// Both the array of references and the data arrays are aligned to the given
+// alignment. The allocated memory is zero-filled.
+template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class Padded2DArray {
+ public:
+  // Creates an aligned padded 2D array.
+  // The memory cannot be deleted since the raw memory chunk is not returned.
+  static T** create_unfreeable(uint rows, uint columns, size_t* allocation_size = NULL);
+};
+
+// Helper class to create an array of T objects. The array as a whole will
+// start at a multiple of alignment and its size will be aligned to alignment.
+template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
+class PaddedPrimitiveArray {
+ public:
+  static T* create_unfreeable(size_t length);
+};
+
 #endif // SHARE_VM_MEMORY_PADDED_HPP
--- a/src/share/vm/memory/padded.inline.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/padded.inline.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,3 +47,42 @@

   return aligned_padded_array;
 }
+
+template <class T, MEMFLAGS flags, size_t alignment>
+T** Padded2DArray<T, flags, alignment>::create_unfreeable(uint rows, uint columns, size_t* allocation_size) {
+  // Calculate and align the size of the first dimension's table.
+  size_t table_size = align_size_up_(rows * sizeof(T*), alignment);
+  // The size of the separate rows.
+  size_t row_size = align_size_up_(columns * sizeof(T), alignment);
+  // Total size consists of the indirection table plus the rows.
+  size_t total_size = table_size + rows * row_size + alignment;
+
+  // Allocate a chunk of memory large enough to allow alignment of the chunk.
+  void* chunk = AllocateHeap(total_size, flags);
+  // Clear the allocated memory.
+  memset(chunk, 0, total_size);
+  // Align the chunk of memory.
+  T** result = (T**)align_pointer_up(chunk, alignment);
+  void* data_start = (void*)((uintptr_t)result + table_size);
+
+  // Fill in the row table.
+  for (size_t i = 0; i < rows; i++) {
+    result[i] = (T*)((uintptr_t)data_start + i * row_size);
+  }
+
+  if (allocation_size != NULL) {
+    *allocation_size = total_size;
+  }
+
+  return result;
+}
+
+template <class T, MEMFLAGS flags, size_t alignment>
+T* PaddedPrimitiveArray<T, flags, alignment>::create_unfreeable(size_t length) {
+  // Allocate a chunk of memory large enough to allow for some alignment.
+  void* chunk = AllocateHeap(length * sizeof(T) + alignment, flags);
+
+  memset(chunk, 0, length * sizeof(T) + alignment);
+
+  return (T*)align_pointer_up(chunk, alignment);
+}
--- a/src/share/vm/memory/referenceProcessor.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -95,12 +95,11 @@
                                        uint      mt_discovery_degree,
                                        bool      atomic_discovery,
                                        BoolObjectClosure* is_alive_non_header,
-                                       bool      discovered_list_needs_barrier)  :
+                                       bool      discovered_list_needs_post_barrier)  :
   _discovering_refs(false),
   _enqueuing_is_done(false),
   _is_alive_non_header(is_alive_non_header),
-  _discovered_list_needs_barrier(discovered_list_needs_barrier),
-  _bs(NULL),
+  _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier),
   _processing_is_mt(mt_processing),
   _next_id(0)
 {
@@ -126,10 +125,6 @@
     _discovered_refs[i].set_length(0);
   }

-  // If we do barriers, cache a copy of the barrier set.
-  if (discovered_list_needs_barrier) {
-    _bs = Universe::heap()->barrier_set();
-  }
   setup_policy(false /* default soft ref policy */);
 }

@@ -317,13 +312,9 @@
   // Enqueue references that are not made active again, and
   // clear the decks for the next collection (cycle).
   ref->enqueue_discovered_reflists((HeapWord*)pending_list_addr, task_executor);
-  // Do the oop-check on pending_list_addr missed in
-  // enqueue_discovered_reflist. We should probably
-  // do a raw oop_check so that future such idempotent
-  // oop_stores relying on the oop-check side-effect
-  // may be elided automatically and safely without
-  // affecting correctness.
-  oop_store(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr));
+  // Do the post-barrier on pending_list_addr missed in
+  // enqueue_discovered_reflist.
+  oopDesc::bs()->write_ref_field(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr));

   // Stop treating discovered references specially.
   ref->disable_discovery();
@@ -372,15 +363,17 @@
       assert(java_lang_ref_Reference::next(obj) == NULL,
              "Reference not active; should not be discovered");
       // Self-loop next, so as to make Ref not active.
-      java_lang_ref_Reference::set_next(obj, obj);
+      // Post-barrier not needed when looping to self.
+      java_lang_ref_Reference::set_next_raw(obj, obj);
       if (next_d == obj) {  // obj is last
         // Swap refs_list into pendling_list_addr and
         // set obj's discovered to what we read from pending_list_addr.
         oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr);
-        // Need oop_check on pending_list_addr above;
-        // see special oop-check code at the end of
+        // Need post-barrier on pending_list_addr above;
+        // see special post-barrier code at the end of
         // enqueue_discovered_reflists() further below.
-        java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL
+        java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL
+        oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old);
       }
     }
   } else { // Old behaviour
@@ -497,13 +490,13 @@
   } else {
     new_next = _next;
   }
-
-  if (UseCompressedOops) {
-    // Remove Reference object from list.
-    oopDesc::encode_store_heap_oop((narrowOop*)_prev_next, new_next);
-  } else {
-    // Remove Reference object from list.
-    oopDesc::store_heap_oop((oop*)_prev_next, new_next);
+  // Remove Reference object from discovered list. Note that G1 does not need a
+  // pre-barrier here because we know the Reference has already been found/marked,
+  // that's how it ended up in the discovered list in the first place.
+  oop_store_raw(_prev_next, new_next);
+  if (_discovered_list_needs_post_barrier && _prev_next != _refs_list.adr_head()) {
+    // Needs post-barrier and this is not the list head (which is not on the heap)
+    oopDesc::bs()->write_ref_field(_prev_next, new_next);
   }
   NOT_PRODUCT(_removed++);
   _refs_list.dec_length(1);
@@ -516,13 +509,11 @@
   // the reference object and will fail
   // CT verification.
   if (UseG1GC) {
-    BarrierSet* bs = oopDesc::bs();
     HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref);
-
     if (UseCompressedOops) {
-      bs->write_ref_field_pre((narrowOop*)next_addr, NULL);
+      oopDesc::bs()->write_ref_field_pre((narrowOop*)next_addr, NULL);
     } else {
-      bs->write_ref_field_pre((oop*)next_addr, NULL);
+      oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL);
     }
     java_lang_ref_Reference::set_next_raw(_ref, NULL);
   } else {
@@ -553,7 +544,7 @@
                                    OopClosure*        keep_alive,
                                    VoidClosure*       complete_gc) {
   assert(policy != NULL, "Must have a non-NULL policy");
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier);
   // Decide which softly reachable refs should be kept alive.
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */));
@@ -593,7 +584,7 @@
                              BoolObjectClosure* is_alive,
                              OopClosure*        keep_alive) {
   assert(discovery_is_atomic(), "Error");
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */));
     DEBUG_ONLY(oop next = java_lang_ref_Reference::next(iter.obj());)
@@ -630,7 +621,7 @@
                                                   OopClosure*        keep_alive,
                                                   VoidClosure*       complete_gc) {
   assert(!discovery_is_atomic(), "Error");
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */));
     HeapWord* next_addr = java_lang_ref_Reference::next_addr(iter.obj());
@@ -673,7 +664,7 @@
                                    OopClosure*        keep_alive,
                                    VoidClosure*       complete_gc) {
   ResourceMark rm;
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier);
   while (iter.has_next()) {
     iter.update_discovered();
     iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */));
@@ -790,10 +781,9 @@
 };

 void ReferenceProcessor::set_discovered(oop ref, oop value) {
-  if (_discovered_list_needs_barrier) {
-    java_lang_ref_Reference::set_discovered(ref, value);
-  } else {
-    java_lang_ref_Reference::set_discovered_raw(ref, value);
+  java_lang_ref_Reference::set_discovered_raw(ref, value);
+  if (_discovered_list_needs_post_barrier) {
+    oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(ref), value);
   }
 }

@@ -990,7 +980,7 @@

 void ReferenceProcessor::clean_up_discovered_reflist(DiscoveredList& refs_list) {
   assert(!discovery_is_atomic(), "Else why call this method?");
-  DiscoveredListIterator iter(refs_list, NULL, NULL);
+  DiscoveredListIterator iter(refs_list, NULL, NULL, _discovered_list_needs_post_barrier);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */));
     oop next = java_lang_ref_Reference::next(iter.obj());
@@ -1085,8 +1075,8 @@
   // so this will expand to nothing. As a result, we have manually
   // elided this out for G1, but left in the test for some future
   // collector that might have need for a pre-barrier here, e.g.:-
-  // _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
-  assert(!_discovered_list_needs_barrier || UseG1GC,
+  // oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
+  assert(!_discovered_list_needs_post_barrier || UseG1GC,
          "Need to check non-G1 collector: "
          "may need a pre-write-barrier for CAS from NULL below");
   oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr,
@@ -1097,8 +1087,8 @@
     // is necessary.
     refs_list.set_head(obj);
     refs_list.inc_length(1);
-    if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, next_discovered);
+    if (_discovered_list_needs_post_barrier) {
+      oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered);
     }

     if (TraceReferenceGC) {
@@ -1250,7 +1240,7 @@
   if (_discovery_is_mt) {
     add_to_discovered_list_mt(*list, obj, discovered_addr);
   } else {
-    // If "_discovered_list_needs_barrier", we do write barriers when
+    // If "_discovered_list_needs_post_barrier", we do write barriers when
     // updating the discovered reference list.  Otherwise, we do a raw store
     // here: the field will be visited later when processing the discovered
     // references.
@@ -1260,13 +1250,13 @@

     // As in the case further above, since we are over-writing a NULL
     // pre-value, we can safely elide the pre-barrier here for the case of G1.
-    // e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
+    // e.g.:- oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered);
     assert(discovered == NULL, "control point invariant");
-    assert(!_discovered_list_needs_barrier || UseG1GC,
+    assert(!_discovered_list_needs_post_barrier || UseG1GC,
            "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below");
     oop_store_raw(discovered_addr, next_discovered);
-    if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, next_discovered);
+    if (_discovered_list_needs_post_barrier) {
+      oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered);
     }
     list->set_head(obj);
     list->inc_length(1);
@@ -1361,7 +1351,7 @@
                                                 OopClosure*        keep_alive,
                                                 VoidClosure*       complete_gc,
                                                 YieldClosure*      yield) {
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */));
     oop obj = iter.obj();
--- a/src/share/vm/memory/referenceProcessor.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/referenceProcessor.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -99,6 +99,7 @@
   oop                _referent;
   OopClosure*        _keep_alive;
   BoolObjectClosure* _is_alive;
+  bool               _discovered_list_needs_post_barrier;

   DEBUG_ONLY(
   oop                _first_seen; // cyclic linked list check
@@ -112,7 +113,8 @@
 public:
   inline DiscoveredListIterator(DiscoveredList&    refs_list,
                                 OopClosure*        keep_alive,
-                                BoolObjectClosure* is_alive):
+                                BoolObjectClosure* is_alive,
+                                bool               discovered_list_needs_post_barrier = false):
     _refs_list(refs_list),
     _prev_next(refs_list.adr_head()),
     _prev(NULL),
@@ -126,7 +128,8 @@
 #endif
     _next(NULL),
     _keep_alive(keep_alive),
-    _is_alive(is_alive)
+    _is_alive(is_alive),
+    _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier)
 { }

   // End Of List.
@@ -228,14 +231,13 @@
   bool        _discovery_is_mt;         // true if reference discovery is MT.

   // If true, setting "next" field of a discovered refs list requires
-  // write barrier(s).  (Must be true if used in a collector in which
+  // write post barrier.  (Must be true if used in a collector in which
   // elements of a discovered list may be moved during discovery: for
   // example, a collector like Garbage-First that moves objects during a
   // long-term concurrent marking phase that does weak reference
   // discovery.)
-  bool        _discovered_list_needs_barrier;
+  bool        _discovered_list_needs_post_barrier;

-  BarrierSet* _bs;                      // Cached copy of BarrierSet.
   bool        _enqueuing_is_done;       // true if all weak references enqueued
   bool        _processing_is_mt;        // true during phases when
                                         // reference processing is MT.
@@ -381,8 +383,8 @@

  protected:
   // Set the 'discovered' field of the given reference to
-  // the given value - emitting barriers depending upon
-  // the value of _discovered_list_needs_barrier.
+  // the given value - emitting post barriers depending upon
+  // the value of _discovered_list_needs_post_barrier.
   void set_discovered(oop ref, oop value);

   // "Preclean" the given discovered reference list
@@ -420,32 +422,13 @@
   void update_soft_ref_master_clock();

  public:
-  // constructor
-  ReferenceProcessor():
-    _span((HeapWord*)NULL, (HeapWord*)NULL),
-    _discovered_refs(NULL),
-    _discoveredSoftRefs(NULL),  _discoveredWeakRefs(NULL),
-    _discoveredFinalRefs(NULL), _discoveredPhantomRefs(NULL),
-    _discovering_refs(false),
-    _discovery_is_atomic(true),
-    _enqueuing_is_done(false),
-    _discovery_is_mt(false),
-    _discovered_list_needs_barrier(false),
-    _bs(NULL),
-    _is_alive_non_header(NULL),
-    _num_q(0),
-    _max_num_q(0),
-    _processing_is_mt(false),
-    _next_id(0)
-  { }
-
   // Default parameters give you a vanilla reference processor.
   ReferenceProcessor(MemRegion span,
                      bool mt_processing = false, uint mt_processing_degree = 1,
                      bool mt_discovery  = false, uint mt_discovery_degree  = 1,
                      bool atomic_discovery = true,
                      BoolObjectClosure* is_alive_non_header = NULL,
-                     bool discovered_list_needs_barrier = false);
+                     bool discovered_list_needs_post_barrier = false);

   // RefDiscoveryPolicy values
   enum DiscoveryPolicy {
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -34,6 +34,7 @@
 // Thread-Local Edens support

 // static member initialization
+size_t           ThreadLocalAllocBuffer::_max_size       = 0;
 unsigned         ThreadLocalAllocBuffer::_target_refills = 0;
 GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats   = NULL;

@@ -45,7 +46,7 @@
 void ThreadLocalAllocBuffer::accumulate_statistics_before_gc() {
   global_stats()->initialize();

-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+  for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) {
     thread->tlab().accumulate_statistics();
     thread->tlab().initialize_statistics();
   }
@@ -60,28 +61,32 @@
 }

 void ThreadLocalAllocBuffer::accumulate_statistics() {
-  size_t capacity = Universe::heap()->tlab_capacity(myThread()) / HeapWordSize;
-  size_t unused   = Universe::heap()->unsafe_max_tlab_alloc(myThread()) / HeapWordSize;
-  size_t used     = capacity - unused;
-
-  // Update allocation history if a reasonable amount of eden was allocated.
-  bool update_allocation_history = used > 0.5 * capacity;
+  Thread* thread = myThread();
+  size_t capacity = Universe::heap()->tlab_capacity(thread);
+  size_t used     = Universe::heap()->tlab_used(thread);

   _gc_waste += (unsigned)remaining();
+  size_t total_allocated = thread->allocated_bytes();
+  size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
+  _allocated_before_last_gc = total_allocated;

   if (PrintTLAB && (_number_of_refills > 0 || Verbose)) {
     print_stats("gc");
   }

   if (_number_of_refills > 0) {
+    // Update allocation history if a reasonable amount of eden was allocated.
+    bool update_allocation_history = used > 0.5 * capacity;

     if (update_allocation_history) {
       // Average the fraction of eden allocated in a tlab by this
       // thread for use in the next resize operation.
       // _gc_waste is not subtracted because it's included in
       // "used".
-      size_t allocation = _number_of_refills * desired_size();
-      double alloc_frac = allocation / (double) used;
+      // The result can be larger than 1.0 due to direct to old allocations.
+      // These allocations should ideally not be counted but since it is not possible
+      // to filter them out here we just cap the fraction to be at most 1.0.
+      double alloc_frac = MIN2(1.0, (double) allocated_since_last_gc / used);
       _allocation_fraction.sample(alloc_frac);
     }
     global_stats()->update_allocating_threads();
@@ -126,33 +131,32 @@
 }

 void ThreadLocalAllocBuffer::resize_all_tlabs() {
-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
-    thread->tlab().resize();
+  if (ResizeTLAB) {
+    for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) {
+      thread->tlab().resize();
+    }
   }
 }

 void ThreadLocalAllocBuffer::resize() {
+  // Compute the next tlab size using expected allocation amount
+  assert(ResizeTLAB, "Should not call this otherwise");
+  size_t alloc = (size_t)(_allocation_fraction.average() *
+                          (Universe::heap()->tlab_capacity(myThread()) / HeapWordSize));
+  size_t new_size = alloc / _target_refills;

-  if (ResizeTLAB) {
-    // Compute the next tlab size using expected allocation amount
-    size_t alloc = (size_t)(_allocation_fraction.average() *
-                            (Universe::heap()->tlab_capacity(myThread()) / HeapWordSize));
-    size_t new_size = alloc / _target_refills;
-
-    new_size = MIN2(MAX2(new_size, min_size()), max_size());
-
-    size_t aligned_new_size = align_object_size(new_size);
+  new_size = MIN2(MAX2(new_size, min_size()), max_size());

-    if (PrintTLAB && Verbose) {
-      gclog_or_tty->print("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]"
-                          " refills %d  alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT "\n",
-                          myThread(), myThread()->osthread()->thread_id(),
-                          _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size);
-    }
-    set_desired_size(aligned_new_size);
+  size_t aligned_new_size = align_object_size(new_size);

-    set_refill_waste_limit(initial_refill_waste_limit());
+  if (PrintTLAB && Verbose) {
+    gclog_or_tty->print("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]"
+                        " refills %d  alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT "\n",
+                        myThread(), myThread()->osthread()->thread_id(),
+                        _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size);
   }
+  set_desired_size(aligned_new_size);
+  set_refill_waste_limit(initial_refill_waste_limit());
 }

 void ThreadLocalAllocBuffer::initialize_statistics() {
@@ -248,31 +252,13 @@
   return init_sz;
 }

-const size_t ThreadLocalAllocBuffer::max_size() {
-
-  // TLABs can't be bigger than we can fill with a int[Integer.MAX_VALUE].
-  // This restriction could be removed by enabling filling with multiple arrays.
-  // If we compute that the reasonable way as
-  //    header_size + ((sizeof(jint) * max_jint) / HeapWordSize)
-  // we'll overflow on the multiply, so we do the divide first.
-  // We actually lose a little by dividing first,
-  // but that just makes the TLAB  somewhat smaller than the biggest array,
-  // which is fine, since we'll be able to fill that.
-
-  size_t unaligned_max_size = typeArrayOopDesc::header_size(T_INT) +
-                              sizeof(jint) *
-                              ((juint) max_jint / (size_t) HeapWordSize);
-  return align_size_down(unaligned_max_size, MinObjAlignment);
-}
-
 void ThreadLocalAllocBuffer::print_stats(const char* tag) {
   Thread* thrd = myThread();
   size_t waste = _gc_waste + _slow_refill_waste + _fast_refill_waste;
   size_t alloc = _number_of_refills * _desired_size;
   double waste_percent = alloc == 0 ? 0.0 :
                       100.0 * waste / alloc;
-  size_t tlab_used  = Universe::heap()->tlab_capacity(thrd) -
-                      Universe::heap()->unsafe_max_tlab_alloc(thrd);
+  size_t tlab_used  = Universe::heap()->tlab_used(thrd);
   gclog_or_tty->print("TLAB: %s thread: " INTPTR_FORMAT " [id: %2d]"
                       " desired_size: " SIZE_FORMAT "KB"
                       " slow allocs: %d  refill waste: " SIZE_FORMAT "B"
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -45,7 +45,9 @@
   HeapWord* _end;                                // allocation end (excluding alignment_reserve)
   size_t    _desired_size;                       // desired size   (including alignment_reserve)
   size_t    _refill_waste_limit;                 // hold onto tlab if free() is larger than this
+  size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc

+  static size_t   _max_size;                     // maximum size of any TLAB
   static unsigned _target_refills;               // expected number of refills between GCs

   unsigned  _number_of_refills;
@@ -99,12 +101,13 @@
   static GlobalTLABStats* global_stats() { return _global_stats; }

 public:
-  ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight) {
+  ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight), _allocated_before_last_gc(0) {
     // do nothing.  tlabs must be inited by initialize() calls
   }

   static const size_t min_size()                 { return align_object_size(MinTLABSize / HeapWordSize); }
-  static const size_t max_size();
+  static const size_t max_size()                 { assert(_max_size != 0, "max_size not set up"); return _max_size; }
+  static void set_max_size(size_t max_size)      { _max_size = max_size; }

   HeapWord* start() const                        { return _start; }
   HeapWord* end() const                          { return _end; }
--- a/src/share/vm/memory/universe.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/memory/universe.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -816,6 +816,8 @@
     Universe::_collectedHeap = new GenCollectedHeap(gc_policy);
   }

+  ThreadLocalAllocBuffer::set_max_size(Universe::heap()->max_tlab_size());
+
   jint status = Universe::heap()->initialize();
   if (status != JNI_OK) {
     return status;
--- a/src/share/vm/oops/instanceKlass.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2238,15 +2238,7 @@
   for (int m = 0; m < methods()->length(); m++) {
     MethodData* mdo = methods()->at(m)->method_data();
     if (mdo != NULL) {
-      for (ProfileData* data = mdo->first_data();
-           mdo->is_valid(data);
-           data = mdo->next_data(data)) {
-        data->clean_weak_klass_links(is_alive);
-      }
-      ParametersTypeData* parameters = mdo->parameters_type_data();
-      if (parameters != NULL) {
-        parameters->clean_weak_klass_links(is_alive);
-      }
+      mdo->clean_method_data(is_alive);
     }
   }
 }
--- a/src/share/vm/oops/instanceKlass.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -306,7 +306,7 @@
   //   three cases:
   //     NULL: no implementor.
   //     A Klass* that's not itself: one implementor.
-  //     Itsef: more than one implementors.
+  //     Itself: more than one implementors.
   // embedded host klass follows here
   //   The embedded host klass only exists in an anonymous class for
   //   dynamic language support (JSR 292 enabled). The host class grants
@@ -554,6 +554,7 @@
     if (hk == NULL) {
       return NULL;
     } else {
+      assert(*hk != NULL, "host klass should always be set if the address is not null");
       return *hk;
     }
   }
--- a/src/share/vm/oops/metadata.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/metadata.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
   int identity_hash()                { return (int)(uintptr_t)this; }

   // Rehashing support for tables containing pointers to this
-  unsigned int new_hash(jint seed)   { ShouldNotReachHere();  return 0; }
+  unsigned int new_hash(juint seed)   { ShouldNotReachHere();  return 0; }

   virtual bool is_klass()              const volatile { return false; }
   virtual bool is_method()             const volatile { return false; }
--- a/src/share/vm/oops/method.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/method.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -273,7 +273,7 @@
 }

 address Method::bcp_from(int bci) const {
-  assert((is_native() && bci == 0)  || (!is_native() && 0 <= bci && bci < code_size()), "illegal bci");
+  assert((is_native() && bci == 0)  || (!is_native() && 0 <= bci && bci < code_size()), err_msg("illegal bci: %d", bci));
   address bcp = code_base() + bci;
   assert(is_native() && bcp == code_base() || contains(bcp), "bcp doesn't belong to this method");
   return bcp;
--- a/src/share/vm/oops/methodData.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/methodData.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -24,6 +24,7 @@

 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
+#include "compiler/compilerOracle.hpp"
 #include "interpreter/bytecode.hpp"
 #include "interpreter/bytecodeStream.hpp"
 #include "interpreter/linkResolver.hpp"
@@ -80,8 +81,42 @@
   _data = NULL;
 }

+char* ProfileData::print_data_on_helper(const MethodData* md) const {
+  DataLayout* dp  = md->extra_data_base();
+  DataLayout* end = md->extra_data_limit();
+  stringStream ss;
+  for (;; dp = MethodData::next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag:
+      if (dp->bci() == bci()) {
+        SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+        int trap = data->trap_state();
+        char buf[100];
+        ss.print("trap/");
+        data->method()->print_short_name(&ss);
+        ss.print("(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap));
+      }
+      break;
+    case DataLayout::bit_data_tag:
+      break;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      return ss.as_string();
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+  return NULL;
+}
+
+void ProfileData::print_data_on(outputStream* st, const MethodData* md) const {
+  print_data_on(st, print_data_on_helper(md));
+}
+
 #ifndef PRODUCT
-void ProfileData::print_shared(outputStream* st, const char* name) const {
+void ProfileData::print_shared(outputStream* st, const char* name, const char* extra) const {
   st->print("bci: %d", bci());
   st->fill_to(tab_width_one);
   st->print("%s", name);
@@ -91,9 +126,13 @@
     char buf[100];
     st->print("trap(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap));
   }
+  if (extra != NULL) {
+    st->print(extra);
+  }
   int flags = data()->flags();
-  if (flags != 0)
+  if (flags != 0) {
     st->print("flags(%d) ", flags);
+  }
 }

 void ProfileData::tab(outputStream* st, bool first) const {
@@ -109,8 +148,8 @@


 #ifndef PRODUCT
-void BitData::print_data_on(outputStream* st) const {
-  print_shared(st, "BitData");
+void BitData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "BitData", extra);
 }
 #endif // !PRODUCT

@@ -120,8 +159,8 @@
 // A CounterData corresponds to a simple counter.

 #ifndef PRODUCT
-void CounterData::print_data_on(outputStream* st) const {
-  print_shared(st, "CounterData");
+void CounterData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "CounterData", extra);
   st->print_cr("count(%u)", count());
 }
 #endif // !PRODUCT
@@ -150,8 +189,8 @@
 }

 #ifndef PRODUCT
-void JumpData::print_data_on(outputStream* st) const {
-  print_shared(st, "JumpData");
+void JumpData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "JumpData", extra);
   st->print_cr("taken(%u) displacement(%d)", taken(), displacement());
 }
 #endif // !PRODUCT
@@ -332,8 +371,8 @@
   st->cr();
 }

-void CallTypeData::print_data_on(outputStream* st) const {
-  CounterData::print_data_on(st);
+void CallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  CounterData::print_data_on(st, extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -346,8 +385,8 @@
   }
 }

-void VirtualCallTypeData::print_data_on(outputStream* st) const {
-  VirtualCallData::print_data_on(st);
+void VirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const {
+  VirtualCallData::print_data_on(st, extra);
   if (has_arguments()) {
     tab(st, true);
     st->print("argument types");
@@ -400,12 +439,12 @@
     }
   }
 }
-void ReceiverTypeData::print_data_on(outputStream* st) const {
-  print_shared(st, "ReceiverTypeData");
+void ReceiverTypeData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ReceiverTypeData", extra);
   print_receiver_data_on(st);
 }
-void VirtualCallData::print_data_on(outputStream* st) const {
-  print_shared(st, "VirtualCallData");
+void VirtualCallData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "VirtualCallData", extra);
   print_receiver_data_on(st);
 }
 #endif // !PRODUCT
@@ -461,8 +500,8 @@
 #endif // CC_INTERP

 #ifndef PRODUCT
-void RetData::print_data_on(outputStream* st) const {
-  print_shared(st, "RetData");
+void RetData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "RetData", extra);
   uint row;
   int entries = 0;
   for (row = 0; row < row_limit(); row++) {
@@ -496,8 +535,8 @@
 }

 #ifndef PRODUCT
-void BranchData::print_data_on(outputStream* st) const {
-  print_shared(st, "BranchData");
+void BranchData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "BranchData", extra);
   st->print_cr("taken(%u) displacement(%d)",
                taken(), displacement());
   tab(st);
@@ -570,8 +609,8 @@
 }

 #ifndef PRODUCT
-void MultiBranchData::print_data_on(outputStream* st) const {
-  print_shared(st, "MultiBranchData");
+void MultiBranchData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "MultiBranchData", extra);
   st->print_cr("default_count(%u) displacement(%d)",
                default_count(), default_displacement());
   int cases = number_of_cases();
@@ -584,8 +623,8 @@
 #endif

 #ifndef PRODUCT
-void ArgInfoData::print_data_on(outputStream* st) const {
-  print_shared(st, "ArgInfoData");
+void ArgInfoData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "ArgInfoData", extra);
   int nargs = number_of_args();
   for (int i = 0; i < nargs; i++) {
     st->print("  0x%x", arg_modified(i));
@@ -616,10 +655,17 @@
 }

 #ifndef PRODUCT
-void ParametersTypeData::print_data_on(outputStream* st) const {
-  st->print("parameter types");
+void ParametersTypeData::print_data_on(outputStream* st, const char* extra) const {
+  st->print("parameter types", extra);
   _parameters.print_data_on(st);
 }
+
+void SpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const {
+  print_shared(st, "SpeculativeTrapData", extra);
+  tab(st);
+  method()->print_short_name(st);
+  st->cr();
+}
 #endif

 // ==================================================================
@@ -745,7 +791,27 @@
   return DataLayout::compute_size_in_bytes(cell_count);
 }

-int MethodData::compute_extra_data_count(int data_size, int empty_bc_count) {
+bool MethodData::is_speculative_trap_bytecode(Bytecodes::Code code) {
+  // Bytecodes for which we may use speculation
+  switch (code) {
+  case Bytecodes::_checkcast:
+  case Bytecodes::_instanceof:
+  case Bytecodes::_aastore:
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokeinterface:
+  case Bytecodes::_if_acmpeq:
+  case Bytecodes::_if_acmpne:
+  case Bytecodes::_invokestatic:
+#ifdef COMPILER2
+    return UseTypeSpeculation;
+#endif
+  default:
+    return false;
+  }
+  return false;
+}
+
+int MethodData::compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps) {
   if (ProfileTraps) {
     // Assume that up to 3% of BCIs with no MDP will need to allocate one.
     int extra_data_count = (uint)(empty_bc_count * 3) / 128 + 1;
@@ -756,7 +822,18 @@
       extra_data_count = one_percent_of_data;
     if (extra_data_count > empty_bc_count)
       extra_data_count = empty_bc_count;  // no need for more
-    return extra_data_count;
+
+    // Make sure we have a minimum number of extra data slots to
+    // allocate SpeculativeTrapData entries. We would want to have one
+    // entry per compilation that inlines this method and for which
+    // some type speculation assumption fails. So the room we need for
+    // the SpeculativeTrapData entries doesn't directly depend on the
+    // size of the method. Because it's hard to estimate, we reserve
+    // space for an arbitrary number of entries.
+    int spec_data_count = (needs_speculative_traps ? SpecTrapLimitExtraEntries : 0) *
+      (SpeculativeTrapData::static_cell_count() + DataLayout::header_size_in_cells());
+
+    return MAX2(extra_data_count, spec_data_count);
   } else {
     return 0;
   }
@@ -769,15 +846,17 @@
   BytecodeStream stream(method);
   Bytecodes::Code c;
   int empty_bc_count = 0;  // number of bytecodes lacking data
+  bool needs_speculative_traps = false;
   while ((c = stream.next()) >= 0) {
     int size_in_bytes = compute_data_size(&stream);
     data_size += size_in_bytes;
     if (size_in_bytes == 0)  empty_bc_count += 1;
+    needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c);
   }
   int object_size = in_bytes(data_offset()) + data_size;

   // Add some extra DataLayout cells (at least one) to track stray traps.
-  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count);
+  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps);
   object_size += extra_data_count * DataLayout::compute_size_in_bytes(0);

   // Add a cell to record information about modified arguments.
@@ -993,7 +1072,8 @@
 }

 // Initialize the MethodData* corresponding to a given method.
-MethodData::MethodData(methodHandle method, int size, TRAPS) {
+MethodData::MethodData(methodHandle method, int size, TRAPS)
+  : _extra_data_lock(Monitor::leaf, "MDO extra data lock") {
   No_Safepoint_Verifier no_safepoint;  // init function atomic wrt GC
   ResourceMark rm;
   // Set the method back-pointer.
@@ -1009,18 +1089,23 @@
   _data[0] = 0;  // apparently not set below.
   BytecodeStream stream(method);
   Bytecodes::Code c;
+  bool needs_speculative_traps = false;
   while ((c = stream.next()) >= 0) {
     int size_in_bytes = initialize_data(&stream, data_size);
     data_size += size_in_bytes;
     if (size_in_bytes == 0)  empty_bc_count += 1;
+    needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c);
   }
   _data_size = data_size;
   int object_size = in_bytes(data_offset()) + data_size;

   // Add some extra DataLayout cells (at least one) to track stray traps.
-  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count);
+  int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps);
   int extra_size = extra_data_count * DataLayout::compute_size_in_bytes(0);

+  // Let's zero the space for the extra data
+  Copy::zero_to_bytes(((address)_data) + data_size, extra_size);
+
   // Add a cell to record information about modified arguments.
   // Set up _args_modified array after traps cells so that
   // the code for traps cells works.
@@ -1032,17 +1117,17 @@
   int arg_data_size = DataLayout::compute_size_in_bytes(arg_size+1);
   object_size += extra_size + arg_data_size;

-  int args_cell = ParametersTypeData::compute_cell_count(method());
+  int parms_cell = ParametersTypeData::compute_cell_count(method());
   // If we are profiling parameters, we reserver an area near the end
   // of the MDO after the slots for bytecodes (because there's no bci
   // for method entry so they don't fit with the framework for the
   // profiling of bytecodes). We store the offset within the MDO of
   // this area (or -1 if no parameter is profiled)
-  if (args_cell > 0) {
-    object_size += DataLayout::compute_size_in_bytes(args_cell);
+  if (parms_cell > 0) {
+    object_size += DataLayout::compute_size_in_bytes(parms_cell);
     _parameters_type_data_di = data_size + extra_size + arg_data_size;
     DataLayout *dp = data_layout_at(data_size + extra_size + arg_data_size);
-    dp->initialize(DataLayout::parameters_type_data_tag, 0, args_cell);
+    dp->initialize(DataLayout::parameters_type_data_tag, 0, parms_cell);
   } else {
     _parameters_type_data_di = -1;
   }
@@ -1069,6 +1154,21 @@
   _highest_osr_comp_level = 0;
   _would_profile = true;

+#if INCLUDE_RTM_OPT
+  _rtm_state = NoRTM; // No RTM lock eliding by default
+  if (UseRTMLocking &&
+      !CompilerOracle::has_option_string(_method, "NoRTMLockEliding")) {
+    if (CompilerOracle::has_option_string(_method, "UseRTMLockEliding") || !UseRTMDeopt) {
+      // Generate RTM lock eliding code without abort ratio calculation code.
+      _rtm_state = UseRTM;
+    } else if (UseRTMDeopt) {
+      // Generate RTM lock eliding code and include abort ratio calculation
+      // code if UseRTMDeopt is on.
+      _rtm_state = ProfileRTM;
+    }
+  }
+#endif
+
   // Initialize flags and trap history.
   _nof_decompiles = 0;
   _nof_overflow_recompiles = 0;
@@ -1133,39 +1233,114 @@
       break;
     }
   }
-  return bci_to_extra_data(bci, false);
+  return bci_to_extra_data(bci, NULL, false);
 }

-// Translate a bci to its corresponding extra data, or NULL.
-ProfileData* MethodData::bci_to_extra_data(int bci, bool create_if_missing) {
-  DataLayout* dp    = extra_data_base();
-  DataLayout* end   = extra_data_limit();
-  DataLayout* avail = NULL;
-  for (; dp < end; dp = next_extra(dp)) {
+DataLayout* MethodData::next_extra(DataLayout* dp) {
+  int nb_cells = 0;
+  switch(dp->tag()) {
+  case DataLayout::bit_data_tag:
+  case DataLayout::no_tag:
+    nb_cells = BitData::static_cell_count();
+    break;
+  case DataLayout::speculative_trap_data_tag:
+    nb_cells = SpeculativeTrapData::static_cell_count();
+    break;
+  default:
+    fatal(err_msg("unexpected tag %d", dp->tag()));
+  }
+  return (DataLayout*)((address)dp + DataLayout::compute_size_in_bytes(nb_cells));
+}
+
+ProfileData* MethodData::bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp, bool concurrent) {
+  DataLayout* end = extra_data_limit();
+
+  for (;; dp = next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
     // No need for "OrderAccess::load_acquire" ops,
     // since the data structure is monotonic.
-    if (dp->tag() == DataLayout::no_tag)  break;
-    if (dp->tag() == DataLayout::arg_info_data_tag) {
-      dp = end; // ArgInfoData is at the end of extra data section.
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
+      return NULL;
+    case DataLayout::arg_info_data_tag:
+      dp = end;
+      return NULL; // ArgInfoData is at the end of extra data section.
+    case DataLayout::bit_data_tag:
+      if (m == NULL && dp->bci() == bci) {
+        return new BitData(dp);
+      }
       break;
-    }
-    if (dp->bci() == bci) {
-      assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-      return new BitData(dp);
+    case DataLayout::speculative_trap_data_tag:
+      if (m != NULL) {
+        SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+        // data->method() may be null in case of a concurrent
+        // allocation. Maybe it's for the same method. Try to use that
+        // entry in that case.
+        if (dp->bci() == bci) {
+          if (data->method() == NULL) {
+            assert(concurrent, "impossible because no concurrent allocation");
+            return NULL;
+          } else if (data->method() == m) {
+            return data;
+          }
+        }
+      }
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
   }
+  return NULL;
+}
+
+
+// Translate a bci to its corresponding extra data, or NULL.
+ProfileData* MethodData::bci_to_extra_data(int bci, Method* m, bool create_if_missing) {
+  // This code assumes an entry for a SpeculativeTrapData is 2 cells
+  assert(2*DataLayout::compute_size_in_bytes(BitData::static_cell_count()) ==
+         DataLayout::compute_size_in_bytes(SpeculativeTrapData::static_cell_count()),
+         "code needs to be adjusted");
+
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  // Allocation in the extra data space has to be atomic because not
+  // all entries have the same size and non atomic concurrent
+  // allocation would result in a corrupted extra data space.
+  ProfileData* result = bci_to_extra_data_helper(bci, m, dp, true);
+  if (result != NULL) {
+    return result;
+  }
+
   if (create_if_missing && dp < end) {
-    // Allocate this one.  There is no mutual exclusion,
-    // so two threads could allocate different BCIs to the
-    // same data layout.  This means these extra data
-    // records, like most other MDO contents, must not be
-    // trusted too much.
+    MutexLocker ml(&_extra_data_lock);
+    // Check again now that we have the lock. Another thread may
+    // have added extra data entries.
+    ProfileData* result = bci_to_extra_data_helper(bci, m, dp, false);
+    if (result != NULL || dp >= end) {
+      return result;
+    }
+
+    assert(dp->tag() == DataLayout::no_tag || (dp->tag() == DataLayout::speculative_trap_data_tag && m != NULL), "should be free");
+    assert(next_extra(dp)->tag() == DataLayout::no_tag || next_extra(dp)->tag() == DataLayout::arg_info_data_tag, "should be free or arg info");
+    u1 tag = m == NULL ? DataLayout::bit_data_tag : DataLayout::speculative_trap_data_tag;
+    // SpeculativeTrapData is 2 slots. Make sure we have room.
+    if (m != NULL && next_extra(dp)->tag() != DataLayout::no_tag) {
+      return NULL;
+    }
     DataLayout temp;
-    temp.initialize(DataLayout::bit_data_tag, bci, 0);
-    dp->release_set_header(temp.header());
-    assert(dp->tag() == DataLayout::bit_data_tag, "sane");
-    //NO: assert(dp->bci() == bci, "no concurrent allocation");
-    return new BitData(dp);
+    temp.initialize(tag, bci, 0);
+
+    dp->set_header(temp.header());
+    assert(dp->tag() == tag, "sane");
+    assert(dp->bci() == bci, "no concurrent allocation");
+    if (tag == DataLayout::bit_data_tag) {
+      return new BitData(dp);
+    } else {
+      SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+      data->set_method(m);
+      return data;
+    }
   }
   return NULL;
 }
@@ -1210,25 +1385,35 @@
   for ( ; is_valid(data); data = next_data(data)) {
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
-    data->print_data_on(st);
+    data->print_data_on(st, this);
   }
   st->print_cr("--- Extra data:");
   DataLayout* dp    = extra_data_base();
   DataLayout* end   = extra_data_limit();
-  for (; dp < end; dp = next_extra(dp)) {
+  for (;; dp = next_extra(dp)) {
+    assert(dp < end, "moved past end of extra data");
     // No need for "OrderAccess::load_acquire" ops,
     // since the data structure is monotonic.
-    if (dp->tag() == DataLayout::no_tag)  continue;
-    if (dp->tag() == DataLayout::bit_data_tag) {
+    switch(dp->tag()) {
+    case DataLayout::no_tag:
+      continue;
+    case DataLayout::bit_data_tag:
       data = new BitData(dp);
-    } else {
-      assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo");
+      break;
+    case DataLayout::speculative_trap_data_tag:
+      data = new SpeculativeTrapData(dp);
+      break;
+    case DataLayout::arg_info_data_tag:
       data = new ArgInfoData(dp);
       dp = end; // ArgInfoData is at the end of extra data section.
+      break;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
     }
     st->print("%d", dp_to_di(data->dp()));
     st->fill_to(6);
     data->print_data_on(st);
+    if (dp >= end) return;
   }
 }
 #endif
@@ -1351,3 +1536,110 @@
   assert(profile_parameters_jsr292_only(), "inconsistent");
   return m->is_compiled_lambda_form();
 }
+
+void MethodData::clean_extra_data_helper(DataLayout* dp, int shift, bool reset) {
+  if (shift == 0) {
+    return;
+  }
+  if (!reset) {
+    // Move all cells of trap entry at dp left by "shift" cells
+    intptr_t* start = (intptr_t*)dp;
+    intptr_t* end = (intptr_t*)next_extra(dp);
+    for (intptr_t* ptr = start; ptr < end; ptr++) {
+      *(ptr-shift) = *ptr;
+    }
+  } else {
+    // Reset "shift" cells stopping at dp
+    intptr_t* start = ((intptr_t*)dp) - shift;
+    intptr_t* end = (intptr_t*)dp;
+    for (intptr_t* ptr = start; ptr < end; ptr++) {
+      *ptr = 0;
+    }
+  }
+}
+
+// Remove SpeculativeTrapData entries that reference an unloaded
+// method
+void MethodData::clean_extra_data(BoolObjectClosure* is_alive) {
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  int shift = 0;
+  for (; dp < end; dp = next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+      Method* m = data->method();
+      assert(m != NULL, "should have a method");
+      if (!m->method_holder()->is_loader_alive(is_alive)) {
+        // "shift" accumulates the number of cells for dead
+        // SpeculativeTrapData entries that have been seen so
+        // far. Following entries must be shifted left by that many
+        // cells to remove the dead SpeculativeTrapData entries.
+        shift += (int)((intptr_t*)next_extra(dp) - (intptr_t*)dp);
+      } else {
+        // Shift this entry left if it follows dead
+        // SpeculativeTrapData entries
+        clean_extra_data_helper(dp, shift);
+      }
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      // Shift this entry left if it follows dead SpeculativeTrapData
+      // entries
+      clean_extra_data_helper(dp, shift);
+      continue;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      // We are at end of the live trap entries. The previous "shift"
+      // cells contain entries that are either dead or were shifted
+      // left. They need to be reset to no_tag
+      clean_extra_data_helper(dp, shift, true);
+      return;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+}
+
+// Verify there's no unloaded method referenced by a
+// SpeculativeTrapData entry
+void MethodData::verify_extra_data_clean(BoolObjectClosure* is_alive) {
+#ifdef ASSERT
+  DataLayout* dp  = extra_data_base();
+  DataLayout* end = extra_data_limit();
+
+  for (; dp < end; dp = next_extra(dp)) {
+    switch(dp->tag()) {
+    case DataLayout::speculative_trap_data_tag: {
+      SpeculativeTrapData* data = new SpeculativeTrapData(dp);
+      Method* m = data->method();
+      assert(m != NULL && m->method_holder()->is_loader_alive(is_alive), "Method should exist");
+      break;
+    }
+    case DataLayout::bit_data_tag:
+      continue;
+    case DataLayout::no_tag:
+    case DataLayout::arg_info_data_tag:
+      return;
+    default:
+      fatal(err_msg("unexpected tag %d", dp->tag()));
+    }
+  }
+#endif
+}
+
+void MethodData::clean_method_data(BoolObjectClosure* is_alive) {
+  for (ProfileData* data = first_data();
+       is_valid(data);
+       data = next_data(data)) {
+    data->clean_weak_klass_links(is_alive);
+  }
+  ParametersTypeData* parameters = parameters_type_data();
+  if (parameters != NULL) {
+    parameters->clean_weak_klass_links(is_alive);
+  }
+
+  clean_extra_data(is_alive);
+  verify_extra_data_clean(is_alive);
+}
--- a/src/share/vm/oops/methodData.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/methodData.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -120,7 +120,8 @@
     arg_info_data_tag,
     call_type_data_tag,
     virtual_call_type_data_tag,
-    parameters_type_data_tag
+    parameters_type_data_tag,
+    speculative_trap_data_tag
   };

   enum {
@@ -189,9 +190,6 @@
   void set_header(intptr_t value) {
     _header._bits = value;
   }
-  void release_set_header(intptr_t value) {
-    OrderAccess::release_store_ptr(&_header._bits, value);
-  }
   intptr_t header() {
     return _header._bits;
   }
@@ -271,6 +269,7 @@
 class     MultiBranchData;
 class     ArgInfoData;
 class     ParametersTypeData;
+class   SpeculativeTrapData;

 // ProfileData
 //
@@ -291,6 +290,8 @@
   // This is a pointer to a section of profiling data.
   DataLayout* _data;

+  char* print_data_on_helper(const MethodData* md) const;
+
 protected:
   DataLayout* data() { return _data; }
   const DataLayout* data() const { return _data; }
@@ -440,6 +441,7 @@
   virtual bool is_CallTypeData()    const { return false; }
   virtual bool is_VirtualCallTypeData()const { return false; }
   virtual bool is_ParametersTypeData() const { return false; }
+  virtual bool is_SpeculativeTrapData()const { return false; }


   BitData* as_BitData() const {
@@ -494,6 +496,10 @@
     assert(is_ParametersTypeData(), "wrong type");
     return is_ParametersTypeData() ? (ParametersTypeData*)this : NULL;
   }
+  SpeculativeTrapData* as_SpeculativeTrapData() const {
+    assert(is_SpeculativeTrapData(), "wrong type");
+    return is_SpeculativeTrapData() ? (SpeculativeTrapData*)this : NULL;
+  }


   // Subclass specific initialization
@@ -509,12 +515,14 @@
   // translation here, and the required translators are in the ci subclasses.
   virtual void translate_from(const ProfileData* data) {}

-  virtual void print_data_on(outputStream* st) const {
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const {
     ShouldNotReachHere();
   }

+  void print_data_on(outputStream* st, const MethodData* md) const;
+
 #ifndef PRODUCT
-  void print_shared(outputStream* st, const char* name) const;
+  void print_shared(outputStream* st, const char* name, const char* extra) const;
   void tab(outputStream* st, bool first = false) const;
 #endif
 };
@@ -576,7 +584,7 @@
 #endif // CC_INTERP

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -639,7 +647,7 @@
 #endif // CC_INTERP

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -726,7 +734,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1137,7 +1145,7 @@
   }

 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1282,7 +1290,7 @@

 #ifndef PRODUCT
   void print_receiver_data_on(outputStream* st) const;
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1325,7 +1333,7 @@
 #endif // CC_INTERP

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1451,7 +1459,7 @@
   }

 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1554,7 +1562,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1632,7 +1640,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1825,7 +1833,7 @@
   void post_initialize(BytecodeStream* stream, MethodData* mdo);

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1852,7 +1860,7 @@
   }

 #ifndef PRODUCT
-  void print_data_on(outputStream* st) const;
+  void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif
 };

@@ -1913,7 +1921,7 @@
   }

 #ifndef PRODUCT
-  virtual void print_data_on(outputStream* st) const;
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
 #endif

   static ByteSize stack_slot_offset(int i) {
@@ -1925,6 +1933,54 @@
   }
 };

+// SpeculativeTrapData
+//
+// A SpeculativeTrapData is used to record traps due to type
+// speculation. It records the root of the compilation: that type
+// speculation is wrong in the context of one compilation (for
+// method1) doesn't mean it's wrong in the context of another one (for
+// method2). Type speculation could have more/different data in the
+// context of the compilation of method2 and it's worthwhile to try an
+// optimization that failed for compilation of method1 in the context
+// of compilation of method2.
+// Space for SpeculativeTrapData entries is allocated from the extra
+// data space in the MDO. If we run out of space, the trap data for
+// the ProfileData at that bci is updated.
+class SpeculativeTrapData : public ProfileData {
+protected:
+  enum {
+    method_offset,
+    speculative_trap_cell_count
+  };
+public:
+  SpeculativeTrapData(DataLayout* layout) : ProfileData(layout) {
+    assert(layout->tag() == DataLayout::speculative_trap_data_tag, "wrong type");
+  }
+
+  virtual bool is_SpeculativeTrapData() const { return true; }
+
+  static int static_cell_count() {
+    return speculative_trap_cell_count;
+  }
+
+  virtual int cell_count() const {
+    return static_cell_count();
+  }
+
+  // Direct accessor
+  Method* method() const {
+    return (Method*)intptr_at(method_offset);
+  }
+
+  void set_method(Method* m) {
+    set_intptr_at(method_offset, (intptr_t)m);
+  }
+
+#ifndef PRODUCT
+  virtual void print_data_on(outputStream* st, const char* extra = NULL) const;
+#endif
+};
+
 // MethodData*
 //
 // A MethodData* holds information which has been collected about
@@ -1985,16 +2041,18 @@
   // Cached hint for bci_to_dp and bci_to_data
   int _hint_di;

+  Mutex _extra_data_lock;
+
   MethodData(methodHandle method, int size, TRAPS);
 public:
   static MethodData* allocate(ClassLoaderData* loader_data, methodHandle method, TRAPS);
-  MethodData() {}; // For ciMethodData
+  MethodData() : _extra_data_lock(Monitor::leaf, "MDO extra data lock") {}; // For ciMethodData

   bool is_methodData() const volatile { return true; }

   // Whole-method sticky bits and flags
   enum {
-    _trap_hist_limit    = 17,   // decoupled from Deoptimization::Reason_LIMIT
+    _trap_hist_limit    = 19,   // decoupled from Deoptimization::Reason_LIMIT
     _trap_hist_mask     = max_jubyte,
     _extra_data_count   = 4     // extra DataLayout headers, for trap history
   }; // Public flag values
@@ -2025,6 +2083,12 @@
   // Counter values at the time profiling started.
   int               _invocation_counter_start;
   int               _backedge_counter_start;
+
+#if INCLUDE_RTM_OPT
+  // State of RTM code generation during compilation of the method
+  int               _rtm_state;
+#endif
+
   // Number of loops and blocks is computed when compiling the first
   // time with C1. It is used to determine if method is trivial.
   short             _num_loops;
@@ -2049,6 +2113,7 @@
   // Helper for size computation
   static int compute_data_size(BytecodeStream* stream);
   static int bytecode_cell_count(Bytecodes::Code code);
+  static bool is_speculative_trap_bytecode(Bytecodes::Code code);
   enum { no_profile_data = -1, variable_cell_count = -2 };

   // Helper for initialization
@@ -2092,8 +2157,9 @@
   // What is the index of the first data entry?
   int first_di() const { return 0; }

+  ProfileData* bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp, bool concurrent);
   // Find or create an extra ProfileData:
-  ProfileData* bci_to_extra_data(int bci, bool create_if_missing);
+  ProfileData* bci_to_extra_data(int bci, Method* m, bool create_if_missing);

   // return the argument info cell
   ArgInfoData *arg_info();
@@ -2116,6 +2182,10 @@
   static bool profile_parameters_jsr292_only();
   static bool profile_all_parameters();

+  void clean_extra_data(BoolObjectClosure* is_alive);
+  void clean_extra_data_helper(DataLayout* dp, int shift, bool reset = false);
+  void verify_extra_data_clean(BoolObjectClosure* is_alive);
+
 public:
   static int header_size() {
     return sizeof(MethodData)/wordSize;
@@ -2124,7 +2194,7 @@
   // Compute the size of a MethodData* before it is created.
   static int compute_allocation_size_in_bytes(methodHandle method);
   static int compute_allocation_size_in_words(methodHandle method);
-  static int compute_extra_data_count(int data_size, int empty_bc_count);
+  static int compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps);

   // Determine if a given bytecode can have profile information.
   static bool bytecode_has_profile(Bytecodes::Code code) {
@@ -2182,6 +2252,22 @@
   InvocationCounter* invocation_counter()     { return &_invocation_counter; }
   InvocationCounter* backedge_counter()       { return &_backedge_counter;   }

+#if INCLUDE_RTM_OPT
+  int rtm_state() const {
+    return _rtm_state;
+  }
+  void set_rtm_state(RTMState rstate) {
+    _rtm_state = (int)rstate;
+  }
+  void atomic_set_rtm_state(RTMState rstate) {
+    Atomic::store((int)rstate, &_rtm_state);
+  }
+
+  static int rtm_state_offset_in_bytes() {
+    return offset_of(MethodData, _rtm_state);
+  }
+#endif
+
   void set_would_profile(bool p)              { _would_profile = p;    }
   bool would_profile() const                  { return _would_profile; }

@@ -2265,9 +2351,26 @@
   ProfileData* bci_to_data(int bci);

   // Same, but try to create an extra_data record if one is needed:
-  ProfileData* allocate_bci_to_data(int bci) {
-    ProfileData* data = bci_to_data(bci);
-    return (data != NULL) ? data : bci_to_extra_data(bci, true);
+  ProfileData* allocate_bci_to_data(int bci, Method* m) {
+    ProfileData* data = NULL;
+    // If m not NULL, try to allocate a SpeculativeTrapData entry
+    if (m == NULL) {
+      data = bci_to_data(bci);
+    }
+    if (data != NULL) {
+      return data;
+    }
+    data = bci_to_extra_data(bci, m, true);
+    if (data != NULL) {
+      return data;
+    }
+    // If SpeculativeTrapData allocation fails try to allocate a
+    // regular entry
+    data = bci_to_data(bci);
+    if (data != NULL) {
+      return data;
+    }
+    return bci_to_extra_data(bci, NULL, true);
   }

   // Add a handful of extra data records, for trap tracking.
@@ -2275,7 +2378,7 @@
   DataLayout* extra_data_limit() const { return (DataLayout*)((address)this + size_in_bytes()); }
   int extra_data_size() const { return (address)extra_data_limit()
                                - (address)extra_data_base(); }
-  static DataLayout* next_extra(DataLayout* dp) { return (DataLayout*)((address)dp + in_bytes(DataLayout::cell_offset(0))); }
+  static DataLayout* next_extra(DataLayout* dp);

   // Return (uint)-1 for overflow.
   uint trap_count(int reason) const {
@@ -2375,6 +2478,8 @@
   static bool profile_return();
   static bool profile_parameters();
   static bool profile_return_jsr292_only();
+
+  void clean_method_data(BoolObjectClosure* is_alive);
 };

 #endif // SHARE_VM_OOPS_METHODDATAOOP_HPP
--- a/src/share/vm/oops/oop.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/oop.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -102,7 +102,7 @@
 }

 // When String table needs to rehash
-unsigned int oopDesc::new_hash(jint seed) {
+unsigned int oopDesc::new_hash(juint seed) {
   EXCEPTION_MARK;
   ResourceMark rm;
   int length;
--- a/src/share/vm/oops/oop.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/oop.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -362,7 +362,7 @@
   intptr_t slow_identity_hash();

   // Alternate hashing code if string table is rehashed
-  unsigned int new_hash(jint seed);
+  unsigned int new_hash(juint seed);

   // marks are forwarded to stack when object is locked
   bool     has_displaced_mark() const;
--- a/src/share/vm/oops/symbol.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/symbol.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,7 +207,7 @@
 }

 // Alternate hashing for unbalanced symbol tables.
-unsigned int Symbol::new_hash(jint seed) {
+unsigned int Symbol::new_hash(juint seed) {
   ResourceMark rm;
   // Use alternate hashing algorithm on this symbol.
   return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length());
--- a/src/share/vm/oops/symbol.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/oops/symbol.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -154,7 +154,7 @@
   int identity_hash()       { return _identity_hash; }

   // For symbol table alternate hashing
-  unsigned int new_hash(jint seed);
+  unsigned int new_hash(juint seed);

   // Reference counting.  See comments above this class for when to use.
   int refcount() const      { return _refcount; }
--- a/src/share/vm/opto/c2_globals.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -449,6 +449,9 @@
   diagnostic(bool, PrintPreciseBiasedLockingStatistics, false,              \
           "Print per-lock-site statistics of biased locking in JVM")        \
                                                                             \
+  diagnostic(bool, PrintPreciseRTMLockingStatistics, false,                 \
+          "Print per-lock-site statistics of rtm locking in JVM")           \
+                                                                            \
   notproduct(bool, PrintEliminateLocks, false,                              \
           "Print out when locks are eliminated")                            \
                                                                             \
@@ -647,15 +650,19 @@
   diagnostic(bool, OptimizeExpensiveOps, true,                              \
           "Find best control for expensive operations")                     \
                                                                             \
-  experimental(bool, UseMathExactIntrinsics, false,                         \
+  product(bool, UseMathExactIntrinsics, true,                               \
           "Enables intrinsification of various java.lang.Math functions")   \
                                                                             \
   experimental(bool, ReplaceInParentMaps, false,                            \
           "Propagate type improvements in callers of inlinee if possible")  \
                                                                             \
-  experimental(bool, UseTypeSpeculation, false,                             \
+  product(bool, UseTypeSpeculation, true,                                   \
           "Speculatively propagate types from profiles")                    \
                                                                             \
+  diagnostic(bool, UseInlineDepthForSpeculativeTypes, true,                 \
+          "Carry inline depth of profile point with speculative type "      \
+          "and give priority to profiling from lower inline depth")         \
+                                                                            \
   product_pd(bool, TrapBasedRangeChecks,                                    \
           "Generate code for range checks that uses a cmp and trap "        \
           "instruction raising SIGTRAP. Used on PPC64.")                    \
--- a/src/share/vm/opto/classes.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/classes.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -29,8 +29,6 @@
 macro(AbsF)
 macro(AbsI)
 macro(AddD)
-macro(AddExactI)
-macro(AddExactL)
 macro(AddF)
 macro(AddI)
 macro(AddL)
@@ -135,7 +133,6 @@
 macro(ExpD)
 macro(FastLock)
 macro(FastUnlock)
-macro(FlagsProj)
 macro(Goto)
 macro(Halt)
 macro(If)
@@ -170,9 +167,6 @@
 macro(LoopLimit)
 macro(Mach)
 macro(MachProj)
-macro(MathExact)
-macro(MathExactI)
-macro(MathExactL)
 macro(MaxI)
 macro(MemBarAcquire)
 macro(LoadFence)
@@ -194,22 +188,25 @@
 macro(MoveL2D)
 macro(MoveD2L)
 macro(MulD)
-macro(MulExactI)
-macro(MulExactL)
 macro(MulF)
 macro(MulHiL)
 macro(MulI)
 macro(MulL)
 macro(Multi)
 macro(NegD)
-macro(NegExactI)
-macro(NegExactL)
 macro(NegF)
 macro(NeverBranch)
 macro(Opaque1)
 macro(Opaque2)
+macro(Opaque3)
 macro(OrI)
 macro(OrL)
+macro(OverflowAddI)
+macro(OverflowSubI)
+macro(OverflowMulI)
+macro(OverflowAddL)
+macro(OverflowSubL)
+macro(OverflowMulL)
 macro(PCTable)
 macro(Parm)
 macro(PartialSubtypeCheck)
@@ -253,8 +250,6 @@
 macro(StrEquals)
 macro(StrIndexOf)
 macro(SubD)
-macro(SubExactI)
-macro(SubExactL)
 macro(SubF)
 macro(SubI)
 macro(SubL)
--- a/src/share/vm/opto/compile.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/compile.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -694,9 +694,10 @@
   set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
   set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));

-  if (ProfileTraps) {
+  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
     // Make sure the method being compiled gets its own MDO,
     // so we can at least track the decompile_count().
+    // Need MDO to record RTM code generation state.
     method()->ensure_method_data();
   }

@@ -907,7 +908,8 @@
                            compiler,
                            env()->comp_level(),
                            has_unsafe_access(),
-                           SharedRuntime::is_wide_vector(max_vector_size())
+                           SharedRuntime::is_wide_vector(max_vector_size()),
+                           rtm_state()
                            );

     if (log() != NULL) // Print code cache state into compiler log
@@ -1073,7 +1075,23 @@
   set_do_scheduling(OptoScheduling);
   set_do_count_invocations(false);
   set_do_method_data_update(false);
-
+  set_rtm_state(NoRTM); // No RTM lock eliding by default
+#if INCLUDE_RTM_OPT
+  if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
+    int rtm_state = method()->method_data()->rtm_state();
+    if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
+      // Don't generate RTM lock eliding code.
+      set_rtm_state(NoRTM);
+    } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
+      // Generate RTM lock eliding code without abort ratio calculation code.
+      set_rtm_state(UseRTM);
+    } else if (UseRTMDeopt) {
+      // Generate RTM lock eliding code and include abort ratio calculation
+      // code if UseRTMDeopt is on.
+      set_rtm_state(ProfileRTM);
+    }
+  }
+#endif
   if (debug_info()->recording_non_safepoints()) {
     set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
                         (comp_arena(), 8, 0, NULL));
@@ -2581,6 +2599,7 @@
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
+  case Op_Opaque3:
     n->subsume_by(n->in(1), this);
     break;
   case Op_CallStaticJava:
@@ -3028,42 +3047,6 @@
       n->set_req(MemBarNode::Precedent, top());
     }
     break;
-    // Must set a control edge on all nodes that produce a FlagsProj
-    // so they can't escape the block that consumes the flags.
-    // Must also set the non throwing branch as the control
-    // for all nodes that depends on the result. Unless the node
-    // already have a control that isn't the control of the
-    // flag producer
-  case Op_FlagsProj:
-    {
-      MathExactNode* math = (MathExactNode*)  n->in(0);
-      Node* ctrl = math->control_node();
-      Node* non_throwing = math->non_throwing_branch();
-      math->set_req(0, ctrl);
-
-      Node* result = math->result_node();
-      if (result != NULL) {
-        for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) {
-          Node* out = result->fast_out(j);
-          // Phi nodes shouldn't be moved. They would only match below if they
-          // had the same control as the MathExactNode. The only time that
-          // would happen is if the Phi is also an input to the MathExact
-          //
-          // Cmp nodes shouldn't have control set at all.
-          if (out->is_Phi() ||
-              out->is_Cmp()) {
-            continue;
-          }
-
-          if (out->in(0) == NULL) {
-            out->set_req(0, non_throwing);
-          } else if (out->in(0) == ctrl) {
-            out->set_req(0, non_throwing);
-          }
-        }
-      }
-    }
-    break;
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -3285,7 +3268,8 @@
     // because of a transient condition during start-up in the interpreter.
     return false;
   }
-  if (md->has_trap_at(bci, reason) != 0) {
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
+  if (md->has_trap_at(bci, m, reason) != 0) {
     // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
     // assume the worst.
@@ -3303,7 +3287,7 @@
 // Less-accurate variant which does not require a method and bci.
 bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
                              ciMethodData* logmd) {
- if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+  if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
     // Too many traps globally.
     // Note that we use cumulative trap_count, not just md->trap_count.
     if (log()) {
@@ -3338,10 +3322,11 @@
   uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
   Deoptimization::DeoptReason per_bc_reason
     = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
   if ((per_bc_reason == Deoptimization::Reason_none
-       || md->has_trap_at(bci, reason) != 0)
+       || md->has_trap_at(bci, m, reason) != 0)
       // The trap frequency measure we care about is the recompile count:
-      && md->trap_recompiled_at(bci)
+      && md->trap_recompiled_at(bci, m)
       && md->overflow_recompile_count() >= bc_cutoff) {
     // Do not emit a trap here if it has already caused recompilations.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
--- a/src/share/vm/opto/compile.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/compile.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -319,9 +319,9 @@
   bool                  _trace_opto_output;
   bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
-
   // JSR 292
   bool                  _has_method_handle_invokes; // True if this method has MethodHandle invokes.
+  RTMState              _rtm_state;             // State of Restricted Transactional Memory usage

   // Compilation environment.
   Arena                 _comp_arena;            // Arena with lifetime equivalent to Compile
@@ -591,6 +591,10 @@
   void          set_print_inlining(bool z)       { _print_inlining = z; }
   bool              print_intrinsics() const     { return _print_intrinsics; }
   void          set_print_intrinsics(bool z)     { _print_intrinsics = z; }
+  RTMState          rtm_state()  const           { return _rtm_state; }
+  void          set_rtm_state(RTMState s)        { _rtm_state = s; }
+  bool              use_rtm() const              { return (_rtm_state & NoRTM) == 0; }
+  bool          profile_rtm() const              { return _rtm_state == ProfileRTM; }
   // check the CompilerOracle for special behaviours for this compile
   bool          method_has_option(const char * option) {
     return method() != NULL && method()->has_option(option);
--- a/src/share/vm/opto/connode.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/connode.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -642,6 +642,19 @@
   virtual const Type *bottom_type() const { return TypeInt::INT; }
 };

+//------------------------------Opaque3Node------------------------------------
+// A node to prevent unwanted optimizations. Will be optimized only during
+// macro nodes expansion.
+class Opaque3Node : public Opaque2Node {
+  int _opt; // what optimization it was used for
+public:
+  enum { RTM_OPT };
+  Opaque3Node(Compile* C, Node *n, int opt) : Opaque2Node(C, n), _opt(opt) {}
+  virtual int Opcode() const;
+  bool rtm_opt() const { return (_opt == RTM_OPT); }
+};
+
+
 //----------------------PartialSubtypeCheckNode--------------------------------
 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
 // array for an instance of the superklass.  Set a hidden internal cache on a
--- a/src/share/vm/opto/doCall.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/doCall.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -250,7 +250,7 @@
           CallGenerator* miss_cg;
           Deoptimization::DeoptReason reason = morphism == 2 ?
                                     Deoptimization::Reason_bimorphic :
-                                    Deoptimization::Reason_class_check;
+                                    (speculative_receiver_type == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check);
           if ((morphism == 1 || (morphism == 2 && next_hit_cg != NULL)) &&
               !too_many_traps(jvms->method(), jvms->bci(), reason)
              ) {
--- a/src/share/vm/opto/graphKit.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -612,9 +612,10 @@
   // Usual case:  Bail to interpreter.
   // Reserve the right to recompile if we haven't seen anything yet.

+  assert(!Deoptimization::reason_is_speculate(reason), "unsupported");
   Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile;
   if (treat_throw_as_hot
-      && (method()->method_data()->trap_recompiled_at(bci())
+      && (method()->method_data()->trap_recompiled_at(bci(), NULL)
           || C->too_many_traps(reason))) {
     // We cannot afford to take more traps here.  Suffer in the interpreter.
     if (C->log() != NULL)
@@ -1124,6 +1125,17 @@
   }
   return _gvn.transform( new (C) ConvI2LNode(offset));
 }
+
+Node* GraphKit::ConvI2UL(Node* offset) {
+  juint offset_con = (juint) find_int_con(offset, Type::OffsetBot);
+  if (offset_con != (juint) Type::OffsetBot) {
+    return longcon((julong) offset_con);
+  }
+  Node* conv = _gvn.transform( new (C) ConvI2LNode(offset));
+  Node* mask = _gvn.transform( ConLNode::make(C, (julong) max_juint) );
+  return _gvn.transform( new (C) AndLNode(conv, mask) );
+}
+
 Node* GraphKit::ConvL2I(Node* offset) {
   // short-circuit a common case
   jlong offset_con = find_long_con(offset, (jlong)Type::OffsetBot);
@@ -2112,30 +2124,33 @@
  * @return           node with improved type
  */
 Node* GraphKit::record_profile_for_speculation(Node* n, ciKlass* exact_kls) {
-  const TypeOopPtr* current_type = _gvn.type(n)->isa_oopptr();
+  const Type* current_type = _gvn.type(n);
   assert(UseTypeSpeculation, "type speculation must be on");
-  if (exact_kls != NULL &&
-      // nothing to improve if type is already exact
-      (current_type == NULL ||
-       (!current_type->klass_is_exact() &&
-        (current_type->speculative() == NULL ||
-         !current_type->speculative()->klass_is_exact())))) {
+
+  const TypeOopPtr* speculative = current_type->speculative();
+
+  if (current_type->would_improve_type(exact_kls, jvms()->depth())) {
     const TypeKlassPtr* tklass = TypeKlassPtr::make(exact_kls);
     const TypeOopPtr* xtype = tklass->as_instance_type();
     assert(xtype->klass_is_exact(), "Should be exact");
-
+    // record the new speculative type's depth
+    speculative = xtype->with_inline_depth(jvms()->depth());
+  }
+
+  if (speculative != current_type->speculative()) {
     // Build a type with a speculative type (what we think we know
     // about the type but will need a guard when we use it)
-    const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, xtype);
-    // We're changing the type, we need a new cast node to carry the
-    // new type. The new type depends on the control: what profiling
-    // tells us is only valid from here as far as we can tell.
-    Node* cast = new(C) CastPPNode(n, spec_type);
-    cast->init_req(0, control());
+    const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, speculative);
+    // We're changing the type, we need a new CheckCast node to carry
+    // the new type. The new type depends on the control: what
+    // profiling tells us is only valid from here as far as we can
+    // tell.
+    Node* cast = new(C) CheckCastPPNode(control(), n, current_type->remove_speculative()->join_speculative(spec_type));
     cast = _gvn.transform(cast);
     replace_in_map(n, cast);
     n = cast;
   }
+
   return n;
 }

@@ -2145,7 +2160,7 @@
  *
  * @param n  receiver node
  *
- * @return           node with improved type
+ * @return   node with improved type
  */
 Node* GraphKit::record_profiled_receiver_for_speculation(Node* n) {
   if (!UseTypeSpeculation) {
@@ -2739,12 +2754,14 @@
 // Subsequent type checks will always fold up.
 Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj,
                                              ciKlass* require_klass,
-                                            ciKlass* spec_klass,
+                                             ciKlass* spec_klass,
                                              bool safe_for_replace) {
   if (!UseTypeProfile || !TypeProfileCasts) return NULL;

+  Deoptimization::DeoptReason reason = spec_klass == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check;
+
   // Make sure we haven't already deoptimized from this tactic.
-  if (too_many_traps(Deoptimization::Reason_class_check))
+  if (too_many_traps(reason))
     return NULL;

   // (No, this isn't a call, but it's enough like a virtual call
@@ -2766,7 +2783,7 @@
                                             &exact_obj);
       { PreserveJVMState pjvms(this);
         set_control(slow_ctl);
-        uncommon_trap(Deoptimization::Reason_class_check,
+        uncommon_trap(reason,
                       Deoptimization::Action_maybe_recompile);
       }
       if (safe_for_replace) {
@@ -2793,8 +2810,10 @@
                                         bool not_null) {
   // type == NULL if profiling tells us this object is always null
   if (type != NULL) {
-    if (!too_many_traps(Deoptimization::Reason_null_check) &&
-        !too_many_traps(Deoptimization::Reason_class_check)) {
+    Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check;
+    Deoptimization::DeoptReason null_reason = Deoptimization::Reason_null_check;
+    if (!too_many_traps(null_reason) &&
+        !too_many_traps(class_reason)) {
       Node* not_null_obj = NULL;
       // not_null is true if we know the object is not null and
       // there's no need for a null check
@@ -2813,7 +2832,7 @@
       {
         PreserveJVMState pjvms(this);
         set_control(slow_ctl);
-        uncommon_trap(Deoptimization::Reason_class_check,
+        uncommon_trap(class_reason,
                       Deoptimization::Action_maybe_recompile);
       }
       replace_in_map(not_null_obj, exact_obj);
@@ -2882,7 +2901,7 @@
   }

   if (known_statically && UseTypeSpeculation) {
-    // If we know the type check always succeed then we don't use the
+    // If we know the type check always succeeds then we don't use the
     // profiling data at this bytecode. Don't lose it, feed it to the
     // type system as a speculative type.
     not_null_obj = record_profiled_receiver_for_speculation(not_null_obj);
@@ -2999,22 +3018,28 @@
   }

   Node* cast_obj = NULL;
-  const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
-  // We may not have profiling here or it may not help us. If we have
-  // a speculative type use it to perform an exact cast.
-  ciKlass* spec_obj_type = obj_type->speculative_type();
-  if (spec_obj_type != NULL ||
-      (data != NULL &&
-       // Counter has never been decremented (due to cast failure).
-       // ...This is a reasonable thing to expect.  It is true of
-       // all casts inserted by javac to implement generic types.
-       data->as_CounterData()->count() >= 0)) {
-    cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
-    if (cast_obj != NULL) {
-      if (failure_control != NULL) // failure is now impossible
-        (*failure_control) = top();
-      // adjust the type of the phi to the exact klass:
-      phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+  if (tk->klass_is_exact()) {
+    // The following optimization tries to statically cast the speculative type of the object
+    // (for example obtained during profiling) to the type of the superklass and then do a
+    // dynamic check that the type of the object is what we expect. To work correctly
+    // for checkcast and aastore the type of superklass should be exact.
+    const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
+    // We may not have profiling here or it may not help us. If we have
+    // a speculative type use it to perform an exact cast.
+    ciKlass* spec_obj_type = obj_type->speculative_type();
+    if (spec_obj_type != NULL ||
+        (data != NULL &&
+         // Counter has never been decremented (due to cast failure).
+         // ...This is a reasonable thing to expect.  It is true of
+         // all casts inserted by javac to implement generic types.
+         data->as_CounterData()->count() >= 0)) {
+      cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
+      if (cast_obj != NULL) {
+        if (failure_control != NULL) // failure is now impossible
+          (*failure_control) = top();
+        // adjust the type of the phi to the exact klass:
+        phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+      }
     }
   }

@@ -3137,10 +3162,14 @@
   Node* mem = reset_memory();

   FastLockNode * flock = _gvn.transform(new (C) FastLockNode(0, obj, box) )->as_FastLock();
-  if (PrintPreciseBiasedLockingStatistics) {
+  if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) {
     // Create the counters for this fast lock.
     flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci
   }
+
+  // Create the rtm counters for this fast lock if needed.
+  flock->create_rtm_lock_counter(sync_jvms()); // sync_jvms used to get current bci
+
   // Add monitor to debug info for the slow path.  If we block inside the
   // slow path and de-opt, we need the monitor hanging around
   map()->push_monitor( flock );
--- a/src/share/vm/opto/graphKit.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -338,6 +338,7 @@
   // Convert between int and long, and size_t.
   // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.)
   Node* ConvI2L(Node* offset);
+  Node* ConvI2UL(Node* offset);
   Node* ConvL2I(Node* offset);
   // Find out the klass of an object.
   Node* load_object_klass(Node* object);
@@ -406,7 +407,7 @@
   // Use the type profile to narrow an object type.
   Node* maybe_cast_profiled_receiver(Node* not_null_obj,
                                      ciKlass* require_klass,
-                                    ciKlass* spec,
+                                     ciKlass* spec,
                                      bool safe_for_replace);

   // Cast obj to type and emit guard unless we had too many traps here already
--- a/src/share/vm/opto/ifnode.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/ifnode.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -76,7 +76,6 @@
   if( !i1->is_Bool() ) return NULL;
   BoolNode *b = i1->as_Bool();
   Node *cmp = b->in(1);
-  if( cmp->is_FlagsProj() ) return NULL;
   if( !cmp->is_Cmp() ) return NULL;
   i1 = cmp->in(1);
   if( i1 == NULL || !i1->is_Phi() ) return NULL;
--- a/src/share/vm/opto/lcm.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/lcm.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -520,13 +520,6 @@
           break;
         }

-        // For nodes that produce a FlagsProj, make the node adjacent to the
-        // use of the FlagsProj
-        if (use->is_FlagsProj() && get_block_for_node(use) == block) {
-          found_machif = true;
-          break;
-        }
-
         // More than this instruction pending for successor to be ready,
         // don't choose this if other opportunities are ready
         if (ready_cnt.at(use->_idx) > 1)
--- a/src/share/vm/opto/library_call.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/library_call.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -203,7 +203,9 @@
   bool inline_math_native(vmIntrinsics::ID id);
   bool inline_trig(vmIntrinsics::ID id);
   bool inline_math(vmIntrinsics::ID id);
-  void inline_math_mathExact(Node* math);
+  template <typename OverflowOp>
+  bool inline_math_overflow(Node* arg1, Node* arg2);
+  void inline_math_mathExact(Node* math, Node* test);
   bool inline_math_addExactI(bool is_increment);
   bool inline_math_addExactL(bool is_increment);
   bool inline_math_multiplyExactI();
@@ -517,31 +519,31 @@

   case vmIntrinsics::_incrementExactI:
   case vmIntrinsics::_addExactI:
-    if (!Matcher::match_rule_supported(Op_AddExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_incrementExactL:
   case vmIntrinsics::_addExactL:
-    if (!Matcher::match_rule_supported(Op_AddExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_decrementExactI:
   case vmIntrinsics::_subtractExactI:
-    if (!Matcher::match_rule_supported(Op_SubExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_decrementExactL:
   case vmIntrinsics::_subtractExactL:
-    if (!Matcher::match_rule_supported(Op_SubExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_negateExactI:
-    if (!Matcher::match_rule_supported(Op_NegExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_negateExactL:
-    if (!Matcher::match_rule_supported(Op_NegExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_multiplyExactI:
-    if (!Matcher::match_rule_supported(Op_MulExactI) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
     break;
   case vmIntrinsics::_multiplyExactL:
-    if (!Matcher::match_rule_supported(Op_MulExactL) || !UseMathExactIntrinsics) return NULL;
+    if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
     break;

  default:
@@ -1970,18 +1972,8 @@
   return true;
 }

-void LibraryCallKit::inline_math_mathExact(Node* math) {
-  // If we didn't get the expected opcode it means we have optimized
-  // the node to something else and don't need the exception edge.
-  if (!math->is_MathExact()) {
-    set_result(math);
-    return;
-  }
-
-  Node* result = _gvn.transform( new(C) ProjNode(math, MathExactNode::result_proj_node));
-  Node* flags = _gvn.transform( new(C) FlagsProjNode(math, MathExactNode::flags_proj_node));
-
-  Node* bol = _gvn.transform( new (C) BoolNode(flags, BoolTest::overflow) );
+void LibraryCallKit::inline_math_mathExact(Node* math, Node *test) {
+  Node* bol = _gvn.transform( new (C) BoolNode(test, BoolTest::overflow) );
   IfNode* check = create_and_map_if(control(), bol, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
   Node* fast_path = _gvn.transform( new (C) IfFalseNode(check));
   Node* slow_path = _gvn.transform( new (C) IfTrueNode(check) );
@@ -1999,108 +1991,50 @@
   }

   set_control(fast_path);
-  set_result(result);
+  set_result(math);
+}
+
+template <typename OverflowOp>
+bool LibraryCallKit::inline_math_overflow(Node* arg1, Node* arg2) {
+  typedef typename OverflowOp::MathOp MathOp;
+
+  MathOp* mathOp = new(C) MathOp(arg1, arg2);
+  Node* operation = _gvn.transform( mathOp );
+  Node* ofcheck = _gvn.transform( new(C) OverflowOp(arg1, arg2) );
+  inline_math_mathExact(operation, ofcheck);
+  return true;
 }

 bool LibraryCallKit::inline_math_addExactI(bool is_increment) {
-  Node* arg1 = argument(0);
-  Node* arg2 = NULL;
-
-  if (is_increment) {
-    arg2 = intcon(1);
-  } else {
-    arg2 = argument(1);
-  }
-
-  Node* add = _gvn.transform( new(C) AddExactINode(NULL, arg1, arg2) );
-  inline_math_mathExact(add);
-  return true;
+  return inline_math_overflow<OverflowAddINode>(argument(0), is_increment ? intcon(1) : argument(1));
 }

 bool LibraryCallKit::inline_math_addExactL(bool is_increment) {
-  Node* arg1 = argument(0); // type long
-  // argument(1) == TOP
-  Node* arg2 = NULL;
-
-  if (is_increment) {
-    arg2 = longcon(1);
-  } else {
-    arg2 = argument(2); // type long
-    // argument(3) == TOP
-  }
-
-  Node* add = _gvn.transform(new(C) AddExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(add);
-  return true;
+  return inline_math_overflow<OverflowAddLNode>(argument(0), is_increment ? longcon(1) : argument(2));
 }

 bool LibraryCallKit::inline_math_subtractExactI(bool is_decrement) {
-  Node* arg1 = argument(0);
-  Node* arg2 = NULL;
-
-  if (is_decrement) {
-    arg2 = intcon(1);
-  } else {
-    arg2 = argument(1);
-  }
-
-  Node* sub = _gvn.transform(new(C) SubExactINode(NULL, arg1, arg2));
-  inline_math_mathExact(sub);
-  return true;
+  return inline_math_overflow<OverflowSubINode>(argument(0), is_decrement ? intcon(1) : argument(1));
 }

 bool LibraryCallKit::inline_math_subtractExactL(bool is_decrement) {
-  Node* arg1 = argument(0); // type long
-  // argument(1) == TOP
-  Node* arg2 = NULL;
-
-  if (is_decrement) {
-    arg2 = longcon(1);
-  } else {
-    arg2 = argument(2); // type long
-    // argument(3) == TOP
-  }
-
-  Node* sub = _gvn.transform(new(C) SubExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(sub);
-  return true;
+  return inline_math_overflow<OverflowSubLNode>(argument(0), is_decrement ? longcon(1) : argument(2));
 }

 bool LibraryCallKit::inline_math_negateExactI() {
-  Node* arg1 = argument(0);
-
-  Node* neg = _gvn.transform(new(C) NegExactINode(NULL, arg1));
-  inline_math_mathExact(neg);
-  return true;
+  return inline_math_overflow<OverflowSubINode>(intcon(0), argument(0));
 }

 bool LibraryCallKit::inline_math_negateExactL() {
-  Node* arg1 = argument(0);
-  // argument(1) == TOP
-
-  Node* neg = _gvn.transform(new(C) NegExactLNode(NULL, arg1));
-  inline_math_mathExact(neg);
-  return true;
+  return inline_math_overflow<OverflowSubLNode>(longcon(0), argument(0));
 }

 bool LibraryCallKit::inline_math_multiplyExactI() {
-  Node* arg1 = argument(0);
-  Node* arg2 = argument(1);
-
-  Node* mul = _gvn.transform(new(C) MulExactINode(NULL, arg1, arg2));
-  inline_math_mathExact(mul);
-  return true;
+  return inline_math_overflow<OverflowMulINode>(argument(0), argument(1));
 }

 bool LibraryCallKit::inline_math_multiplyExactL() {
-  Node* arg1 = argument(0);
-  // argument(1) == TOP
-  Node* arg2 = argument(2);
-  // argument(3) == TOP
-
-  Node* mul = _gvn.transform(new(C) MulExactLNode(NULL, arg1, arg2));
-  inline_math_mathExact(mul);
-  return true;
+  return inline_math_overflow<OverflowMulLNode>(argument(0), argument(2));
 }

 Node*
@@ -2666,7 +2600,7 @@
     case T_ADDRESS:
       // Cast to an int type.
       p = _gvn.transform(new (C) CastP2XNode(NULL, p));
-      p = ConvX2L(p);
+      p = ConvX2UL(p);
       break;
     default:
       fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
@@ -3246,7 +3180,8 @@
 // private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
 bool LibraryCallKit::inline_native_isInterrupted() {
   // Add a fast path to t.isInterrupted(clear_int):
-  //   (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
+  //   (t == Thread.current() &&
+  //    (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int)))
   //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
   // So, in the common case that the interrupt bit is false,
   // we avoid making a call into the VM.  Even if the interrupt bit
@@ -3303,6 +3238,7 @@
   // drop through to next case
   set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));

+#ifndef TARGET_OS_FAMILY_windows
   // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
   Node* clr_arg = argument(1);
   Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
@@ -3316,6 +3252,10 @@

   // drop through to next case
   set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
+#else
+  // To return true on Windows you must read the _interrupted field
+  // and check the the event state i.e. take the slow path.
+#endif // TARGET_OS_FAMILY_windows

   // (d) Otherwise, go to the slow path.
   slow_region->add_req(control());
--- a/src/share/vm/opto/locknode.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/locknode.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -136,6 +136,8 @@
 //-----------------------------hash--------------------------------------------
 uint FastLockNode::hash() const { return NO_HASH; }

+uint FastLockNode::size_of() const { return sizeof(*this); }
+
 //------------------------------cmp--------------------------------------------
 uint FastLockNode::cmp( const Node &n ) const {
   return (&n == this);                // Always fail except on self
@@ -159,6 +161,22 @@
   _counters = blnc->counters();
 }

+void FastLockNode::create_rtm_lock_counter(JVMState* state) {
+#if INCLUDE_RTM_OPT
+  Compile* C = Compile::current();
+  if (C->profile_rtm() || (PrintPreciseRTMLockingStatistics && C->use_rtm())) {
+    RTMLockingNamedCounter* rlnc = (RTMLockingNamedCounter*)
+           OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter);
+    _rtm_counters = rlnc->counters();
+    if (UseRTMForStackLocks) {
+      rlnc = (RTMLockingNamedCounter*)
+           OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter);
+      _stack_rtm_counters = rlnc->counters();
+    }
+  }
+#endif
+}
+
 //=============================================================================
 //------------------------------do_monitor_enter-------------------------------
 void Parse::do_monitor_enter() {
--- a/src/share/vm/opto/locknode.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/locknode.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -92,13 +92,17 @@
 //------------------------------FastLockNode-----------------------------------
 class FastLockNode: public CmpNode {
 private:
-  BiasedLockingCounters* _counters;
+  BiasedLockingCounters*        _counters;
+  RTMLockingCounters*       _rtm_counters; // RTM lock counters for inflated locks
+  RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks

 public:
   FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
     init_req(0,ctrl);
     init_class_id(Class_FastLock);
     _counters = NULL;
+    _rtm_counters = NULL;
+    _stack_rtm_counters = NULL;
   }
   Node* obj_node() const { return in(1); }
   Node* box_node() const { return in(2); }
@@ -107,13 +111,17 @@
   // FastLock and FastUnlockNode do not hash, we need one for each correspoding
   // LockNode/UnLockNode to avoid creating Phi's.
   virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint size_of() const;
   virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
   const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}

   void create_lock_counter(JVMState* s);
-  BiasedLockingCounters* counters() const { return _counters; }
+  void create_rtm_lock_counter(JVMState* state);
+  BiasedLockingCounters*        counters() const { return _counters; }
+  RTMLockingCounters*       rtm_counters() const { return _rtm_counters; }
+  RTMLockingCounters* stack_rtm_counters() const { return _stack_rtm_counters; }
 };
--- a/src/share/vm/opto/loopTransform.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/loopTransform.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -617,6 +617,15 @@
       case Op_AryEq: {
         return false;
       }
+#if INCLUDE_RTM_OPT
+      case Op_FastLock:
+      case Op_FastUnlock: {
+        // Don't unroll RTM locking code because it is large.
+        if (UseRTMLocking) {
+          return false;
+        }
+      }
+#endif
     } // switch
   }

@@ -713,10 +722,6 @@
       case Op_ModL: body_size += 30; break;
       case Op_DivL: body_size += 30; break;
       case Op_MulL: body_size += 10; break;
-      case Op_FlagsProj:
-        // Can't handle unrolling of loops containing
-        // nodes that generate a FlagsProj at the moment
-        return false;
       case Op_StrComp:
       case Op_StrEquals:
       case Op_StrIndexOf:
@@ -726,6 +731,15 @@
         // String intrinsics are large and have loops.
         return false;
       }
+#if INCLUDE_RTM_OPT
+      case Op_FastLock:
+      case Op_FastUnlock: {
+        // Don't unroll RTM locking code because it is large.
+        if (UseRTMLocking) {
+          return false;
+        }
+      }
+#endif
     } // switch
   }

@@ -780,10 +794,6 @@
         continue; // not RC

       Node *cmp = bol->in(1);
-      if (cmp->is_FlagsProj()) {
-        continue;
-      }
-
       Node *rc_exp = cmp->in(1);
       Node *limit = cmp->in(2);
--- a/src/share/vm/opto/loopopts.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -43,12 +43,6 @@
     return NULL;
   }

-  if (n->is_MathExact()) {
-    // MathExact has projections that are not correctly handled in the code
-    // below.
-    return NULL;
-  }
-
   int wins = 0;
   assert(!n->is_CFG(), "");
   assert(region->is_Region(), "");
@@ -2362,8 +2356,7 @@
         opc == Op_Catch     ||
         opc == Op_CatchProj ||
         opc == Op_Jump      ||
-        opc == Op_JumpProj  ||
-        opc == Op_FlagsProj) {
+        opc == Op_JumpProj) {
 #if !defined(PRODUCT)
       if (TracePartialPeeling) {
         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
--- a/src/share/vm/opto/machnode.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/machnode.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -53,6 +53,7 @@
 class Matcher;
 class PhaseRegAlloc;
 class RegMask;
+class RTMLockingCounters;
 class State;

 //---------------------------MachOper------------------------------------------
@@ -659,8 +660,9 @@
 class MachFastLockNode : public MachNode {
   virtual uint size_of() const { return sizeof(*this); } // Size is bigger
 public:
-  BiasedLockingCounters* _counters;
-
+  BiasedLockingCounters*        _counters;
+  RTMLockingCounters*       _rtm_counters; // RTM lock counters for inflated locks
+  RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks
   MachFastLockNode() : MachNode() {}
 };
--- a/src/share/vm/opto/macro.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/macro.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2439,6 +2439,7 @@
     }
   }
   // Next, attempt to eliminate allocations
+  _has_locks = false;
   progress = true;
   while (progress) {
     progress = false;
@@ -2457,11 +2458,13 @@
       case Node::Class_Lock:
       case Node::Class_Unlock:
         assert(!n->as_AbstractLock()->is_eliminated(), "sanity");
+        _has_locks = true;
         break;
       default:
         assert(n->Opcode() == Op_LoopLimit ||
                n->Opcode() == Op_Opaque1   ||
-               n->Opcode() == Op_Opaque2, "unknown node type in macro list");
+               n->Opcode() == Op_Opaque2   ||
+               n->Opcode() == Op_Opaque3, "unknown node type in macro list");
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
@@ -2502,6 +2505,30 @@
       } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
         _igvn.replace_node(n, n->in(1));
         success = true;
+#if INCLUDE_RTM_OPT
+      } else if ((n->Opcode() == Op_Opaque3) && ((Opaque3Node*)n)->rtm_opt()) {
+        assert(C->profile_rtm(), "should be used only in rtm deoptimization code");
+        assert((n->outcnt() == 1) && n->unique_out()->is_Cmp(), "");
+        Node* cmp = n->unique_out();
+#ifdef ASSERT
+        // Validate graph.
+        assert((cmp->outcnt() == 1) && cmp->unique_out()->is_Bool(), "");
+        BoolNode* bol = cmp->unique_out()->as_Bool();
+        assert((bol->outcnt() == 1) && bol->unique_out()->is_If() &&
+               (bol->_test._test == BoolTest::ne), "");
+        IfNode* ifn = bol->unique_out()->as_If();
+        assert((ifn->outcnt() == 2) &&
+               ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change), "");
+#endif
+        Node* repl = n->in(1);
+        if (!_has_locks) {
+          // Remove RTM state check if there are no locks in the code.
+          // Replace input to compare the same value.
+          repl = (cmp->in(1) == n) ? cmp->in(2) : cmp->in(1);
+        }
+        _igvn.replace_node(n, repl);
+        success = true;
+#endif
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
--- a/src/share/vm/opto/macro.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/macro.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -76,6 +76,8 @@
   ProjNode *_memproj_catchall;
   ProjNode *_resproj;

+  // Additional data collected during macro expansion
+  bool _has_locks;

   void expand_allocate(AllocateNode *alloc);
   void expand_allocate_array(AllocateArrayNode *alloc);
@@ -118,7 +120,7 @@
                             Node* length);

 public:
-  PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) {
+  PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) {
     _igvn.set_delay_transform(true);
   }
   void eliminate_macro_nodes();
--- a/src/share/vm/opto/matcher.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/matcher.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1922,6 +1922,105 @@
   return OptoReg::as_OptoReg(regs.first());
 }

+// This function identifies sub-graphs in which a 'load' node is
+// input to two different nodes, and such that it can be matched
+// with BMI instructions like blsi, blsr, etc.
+// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
+// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
+// refers to the same node.
+#ifdef X86
+// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
+// This is a temporary solution until we make DAGs expressible in ADL.
+template<typename ConType>
+class FusedPatternMatcher {
+  Node* _op1_node;
+  Node* _mop_node;
+  int _con_op;
+
+  static int match_next(Node* n, int next_op, int next_op_idx) {
+    if (n->in(1) == NULL || n->in(2) == NULL) {
+      return -1;
+    }
+
+    if (next_op_idx == -1) { // n is commutative, try rotations
+      if (n->in(1)->Opcode() == next_op) {
+        return 1;
+      } else if (n->in(2)->Opcode() == next_op) {
+        return 2;
+      }
+    } else {
+      assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
+      if (n->in(next_op_idx)->Opcode() == next_op) {
+        return next_op_idx;
+      }
+    }
+    return -1;
+  }
+public:
+  FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) :
+    _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
+
+  bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
+             int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
+             typename ConType::NativeType con_value) {
+    if (_op1_node->Opcode() != op1) {
+      return false;
+    }
+    if (_mop_node->outcnt() > 2) {
+      return false;
+    }
+    op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
+    if (op1_op2_idx == -1) {
+      return false;
+    }
+    // Memory operation must be the other edge
+    int op1_mop_idx = (op1_op2_idx & 1) + 1;
+
+    // Check that the mop node is really what we want
+    if (_op1_node->in(op1_mop_idx) == _mop_node) {
+      Node *op2_node = _op1_node->in(op1_op2_idx);
+      if (op2_node->outcnt() > 1) {
+        return false;
+      }
+      assert(op2_node->Opcode() == op2, "Should be");
+      op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
+      if (op2_con_idx == -1) {
+        return false;
+      }
+      // Memory operation must be the other edge
+      int op2_mop_idx = (op2_con_idx & 1) + 1;
+      // Check that the memory operation is the same node
+      if (op2_node->in(op2_mop_idx) == _mop_node) {
+        // Now check the constant
+        const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
+        if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+};
+
+
+bool Matcher::is_bmi_pattern(Node *n, Node *m) {
+  if (n != NULL && m != NULL) {
+    if (m->Opcode() == Op_LoadI) {
+      FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
+      return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
+             bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
+             bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
+    } else if (m->Opcode() == Op_LoadL) {
+      FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
+      return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
+             bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
+             bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
+    }
+  }
+  return false;
+}
+#endif // X86
+
 // A method-klass-holder may be passed in the inline_cache_reg
 // and then expanded into the inline_cache_reg and a method_oop register
 //   defined in ad_<arch>.cpp
@@ -1998,7 +2097,6 @@
       case Op_Catch:
       case Op_CatchProj:
       case Op_CProj:
-      case Op_FlagsProj:
       case Op_JumpProj:
       case Op_JProj:
       case Op_NeverBranch:
@@ -2078,6 +2176,14 @@
           set_shared(m->in(AddPNode::Base)->in(1));
         }

+        // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
+#ifdef X86
+        if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
+          mstack.push(m, Visit);
+          continue;
+        }
+#endif
+
         // Clone addressing expressions as they are "free" in memory access instructions
         if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
           // Some inputs for address expression are not put on stack
--- a/src/share/vm/opto/matcher.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/matcher.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -79,6 +79,9 @@

   // Find shared Nodes, or Nodes that otherwise are Matcher roots
   void find_shared( Node *n );
+#ifdef X86
+  bool is_bmi_pattern(Node *n, Node *m);
+#endif

   // Debug and profile information for nodes in old space:
   GrowableArray<Node_Notes*>* _old_node_note_array;
@@ -340,10 +343,6 @@
   // Register for MODL projection of divmodL
   static RegMask modL_proj_mask();

-  static const RegMask mathExactI_result_proj_mask();
-  static const RegMask mathExactL_result_proj_mask();
-  static const RegMask mathExactI_flags_proj_mask();
-
   // Use hardware DIV instruction when it is faster than
   // a code which use multiply for division by constant.
   static bool use_asm_for_ldiv_by_con( jlong divisor );
--- a/src/share/vm/opto/mathexactnode.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/mathexactnode.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -31,358 +31,93 @@
 #include "opto/mathexactnode.hpp"
 #include "opto/subnode.hpp"

-MathExactNode::MathExactNode(Node* ctrl, Node* in1) : MultiNode(2) {
-  init_class_id(Class_MathExact);
-  init_req(0, ctrl);
-  init_req(1, in1);
-}
-
-MathExactNode::MathExactNode(Node* ctrl, Node* in1, Node* in2) : MultiNode(3) {
-  init_class_id(Class_MathExact);
-  init_req(0, ctrl);
-  init_req(1, in1);
-  init_req(2, in2);
-}
-
-BoolNode* MathExactNode::bool_node() const {
-  Node* flags = flags_node();
-  BoolNode* boolnode = flags->unique_out()->as_Bool();
-  assert(boolnode != NULL, "must have BoolNode");
-  return boolnode;
-}
-
-IfNode* MathExactNode::if_node() const {
-  BoolNode* boolnode = bool_node();
-  IfNode* ifnode = boolnode->unique_out()->as_If();
-  assert(ifnode != NULL, "must have IfNode");
-  return ifnode;
-}
-
-Node* MathExactNode::control_node() const {
-  IfNode* ifnode = if_node();
-  return ifnode->in(0);
-}
-
-Node* MathExactNode::non_throwing_branch() const {
-  IfNode* ifnode = if_node();
-  if (bool_node()->_test._test == BoolTest::overflow) {
-    return ifnode->proj_out(0);
-  }
-  return ifnode->proj_out(1);
-}
-
-// If the MathExactNode won't overflow we have to replace the
-// FlagsProjNode and ProjNode that is generated by the MathExactNode
-Node* MathExactNode::no_overflow(PhaseGVN* phase, Node* new_result) {
-  PhaseIterGVN* igvn = phase->is_IterGVN();
-  if (igvn) {
-    ProjNode* result = result_node();
-    ProjNode* flags = flags_node();
-
-    if (result != NULL) {
-      igvn->replace_node(result, new_result);
-    }
+template <typename OverflowOp>
+class AddHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;
+  typedef typename TypeClass::NativeType NativeType;

-    if (flags != NULL) {
-      BoolNode* boolnode = bool_node();
-      switch (boolnode->_test._test) {
-        case BoolTest::overflow:
-          // if the check is for overflow - never taken
-          igvn->replace_node(boolnode, phase->intcon(0));
-          break;
-        case BoolTest::no_overflow:
-          // if the check is for no overflow - always taken
-          igvn->replace_node(boolnode, phase->intcon(1));
-          break;
-        default:
-          fatal("Unexpected value of BoolTest");
-          break;
-      }
-      flags->del_req(0);
+  static bool will_overflow(NativeType value1, NativeType value2) {
+    NativeType result = value1 + value2;
+    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
+    if (((value1 ^ result) & (value2 ^ result)) >= 0) {
+      return false;
     }
-  }
-  return new_result;
-}
-
-Node* MathExactINode::match(const ProjNode* proj, const Matcher* m) {
-  uint ideal_reg = proj->ideal_reg();
-  RegMask rm;
-  if (proj->_con == result_proj_node) {
-    rm = m->mathExactI_result_proj_mask();
-  } else {
-    assert(proj->_con == flags_proj_node, "must be result or flags");
-    assert(ideal_reg == Op_RegFlags, "sanity");
-    rm = m->mathExactI_flags_proj_mask();
-  }
-  return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg);
-}
-
-Node* MathExactLNode::match(const ProjNode* proj, const Matcher* m) {
-  uint ideal_reg = proj->ideal_reg();
-  RegMask rm;
-  if (proj->_con == result_proj_node) {
-    rm = m->mathExactL_result_proj_mask();
-  } else {
-    assert(proj->_con == flags_proj_node, "must be result or flags");
-    assert(ideal_reg == Op_RegFlags, "sanity");
-    rm = m->mathExactI_flags_proj_mask();
-  }
-  return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg);
-}
-
-Node* AddExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jint result = val1 + val2;
-    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
-    if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) {
-      Node* con_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
+    return true;
   }

-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) { // (Add 0 x) == x
-    Node* add_result = new (phase->C) AddINode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  if (type2->singleton()) {
-    return NULL; // no change - keep constant on the right
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
+      return false;
+    }
+    return true;
   }
-
-  if (type1->singleton()) {
-    // Make it x + Constant - move constant to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg2->is_Load()) {
-    return NULL; // no change - keep load on the right
-  }
-
-  if (arg1->is_Load()) {
-    // Make it x + Load - move load to the right
-    swap_edges(1, 2);
-    return this;
-  }
+};

-  if (arg1->_idx > arg2->_idx) {
-    // Sort the edges
-    swap_edges(1, 2);
-    return this;
-  }
-
-  return NULL;
-}
-
-Node* AddExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
+template <typename OverflowOp>
+class SubHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;
+  typedef typename TypeClass::NativeType NativeType;

-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-    jlong result = val1 + val2;
-    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
-    if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) {
-      Node* con_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, con_result);
+  static bool will_overflow(NativeType value1, NativeType value2) {
+    NativeType result = value1 - value2;
+    // hacker's delight 2-12 overflow iff the arguments have different signs and
+    // the sign of the result is different than the sign of arg1
+    if (((value1 ^ value2) & (value1 ^ result)) >= 0) {
+      return false;
     }
-    return NULL;
+    return true;
   }

-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) { // (Add 0 x) == x
-    Node* add_result = new (phase->C) AddLNode(arg1, arg2);
-    return no_overflow(phase, add_result);
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type2 == TypeClass::ZERO) {
+      return false;
+    }
+    return true;
   }
-
-  if (type2->singleton()) {
-    return NULL; // no change - keep constant on the right
-  }
-
-  if (type1->singleton()) {
-    // Make it x + Constant - move constant to the right
-    swap_edges(1, 2);
-    return this;
-  }
+};

-  if (arg2->is_Load()) {
-    return NULL; // no change - keep load on the right
-  }
-
-  if (arg1->is_Load()) {
-    // Make it x + Load - move load to the right
-    swap_edges(1, 2);
-    return this;
-  }
-
-  if (arg1->_idx > arg2->_idx) {
-    // Sort the edges
-    swap_edges(1, 2);
-    return this;
-  }
+template <typename OverflowOp>
+class MulHelper {
+public:
+  typedef typename OverflowOp::TypeClass TypeClass;

-  return NULL;
-}
-
-Node* SubExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jint result = val1 - val2;
+  static bool can_overflow(const Type* type1, const Type* type2) {
+    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
+      return false;
+    } else if (type1 == TypeClass::ONE || type2 == TypeClass::ONE) {
+      return false;
+    }
+    return true;
+  }
+};

-    // Hacker's Delight 2-12 Overflow iff the arguments have different signs and
-    // the sign of the result is different than the sign of arg1
-    if (((val1 ^ val2) & (val1 ^ result)) >= 0) {
-      Node* con_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) {
-    // Sub with zero is the same as add with zero
-    Node* add_result = new (phase->C) AddINode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  return NULL;
+bool OverflowAddINode::will_overflow(jint v1, jint v2) const {
+  return AddHelper<OverflowAddINode>::will_overflow(v1, v2);
 }

-Node* SubExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-    jlong result = val1 - val2;
-
-    // Hacker's Delight 2-12 Overflow iff the arguments have different signs and
-    // the sign of the result is different than the sign of arg1
-    if (((val1 ^ val2) & (val1 ^ result)) >= 0) {
-      Node* con_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, con_result);
-    }
-    return NULL;
-  }
-
-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) {
-    // Sub with zero is the same as add with zero
-    Node* add_result = new (phase->C) AddLNode(arg1, arg2);
-    return no_overflow(phase, add_result);
-  }
-
-  return NULL;
-}
-
-Node* NegExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node *arg = in(1);
-
-  const Type* type = phase->type(arg);
-  if (type != Type::TOP && type->singleton()) {
-    jint value = arg->get_int();
-    if (value != min_jint) {
-      Node* neg_result = ConINode::make(phase->C, -value);
-      return no_overflow(phase, neg_result);
-    }
-  }
-  return NULL;
+bool OverflowSubINode::will_overflow(jint v1, jint v2) const {
+  return SubHelper<OverflowSubINode>::will_overflow(v1, v2);
 }

-Node* NegExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node *arg = in(1);
-
-  const Type* type = phase->type(arg);
-  if (type != Type::TOP && type->singleton()) {
-    jlong value = arg->get_long();
-    if (value != min_jlong) {
-      Node* neg_result = ConLNode::make(phase->C, -value);
-      return no_overflow(phase, neg_result);
+bool OverflowMulINode::will_overflow(jint v1, jint v2) const {
+    jlong result = (jlong) v1 * (jlong) v2;
+    if ((jint) result == result) {
+      return false;
     }
-  }
-  return NULL;
+    return true;
 }

-Node* MulExactINode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
-
-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jint val1 = arg1->get_int();
-    jint val2 = arg2->get_int();
-    jlong result = (jlong) val1 * (jlong) val2;
-    if ((jint) result == result) {
-      // no overflow
-      Node* mul_result = ConINode::make(phase->C, result);
-      return no_overflow(phase, mul_result);
-    }
-  }
-
-  if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) {
-    return no_overflow(phase, ConINode::make(phase->C, 0));
-  }
-
-  if (type1 == TypeInt::ONE) {
-    Node* mul_result = new (phase->C) AddINode(arg2, phase->intcon(0));
-    return no_overflow(phase, mul_result);
-  }
-  if (type2 == TypeInt::ONE) {
-    Node* mul_result = new (phase->C) AddINode(arg1, phase->intcon(0));
-    return no_overflow(phase, mul_result);
-  }
-
-  if (type1 == TypeInt::MINUS_1) {
-    return new (phase->C) NegExactINode(NULL, arg2);
-  }
-
-  if (type2 == TypeInt::MINUS_1) {
-    return new (phase->C) NegExactINode(NULL, arg1);
-  }
-
-  return NULL;
+bool OverflowAddLNode::will_overflow(jlong v1, jlong v2) const {
+  return AddHelper<OverflowAddLNode>::will_overflow(v1, v2);
 }

-Node* MulExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
-  Node* arg1 = in(1);
-  Node* arg2 = in(2);
+bool OverflowSubLNode::will_overflow(jlong v1, jlong v2) const {
+  return SubHelper<OverflowSubLNode>::will_overflow(v1, v2);
+}

-  const Type* type1 = phase->type(arg1);
-  const Type* type2 = phase->type(arg2);
-
-  if (type1 != Type::TOP && type1->singleton() &&
-      type2 != Type::TOP && type2->singleton()) {
-    jlong val1 = arg1->get_long();
-    jlong val2 = arg2->get_long();
-
+bool OverflowMulLNode::will_overflow(jlong val1, jlong val2) const {
     jlong result = val1 * val2;
     jlong ax = (val1 < 0 ? -val1 : val1);
     jlong ay = (val2 < 0 ? -val2 : val2);
@@ -398,33 +133,125 @@
       }
     }

-    if (!overflow) {
-      Node* mul_result = ConLNode::make(phase->C, result);
-      return no_overflow(phase, mul_result);
+    return overflow;
+}
+
+bool OverflowAddINode::can_overflow(const Type* t1, const Type* t2) const {
+  return AddHelper<OverflowAddINode>::can_overflow(t1, t2);
+}
+
+bool OverflowSubINode::can_overflow(const Type* t1, const Type* t2) const {
+  if (in(1) == in(2)) {
+    return false;
+  }
+  return SubHelper<OverflowSubINode>::can_overflow(t1, t2);
+}
+
+bool OverflowMulINode::can_overflow(const Type* t1, const Type* t2) const {
+  return MulHelper<OverflowMulINode>::can_overflow(t1, t2);
+}
+
+bool OverflowAddLNode::can_overflow(const Type* t1, const Type* t2) const {
+  return AddHelper<OverflowAddLNode>::can_overflow(t1, t2);
+}
+
+bool OverflowSubLNode::can_overflow(const Type* t1, const Type* t2) const {
+  if (in(1) == in(2)) {
+    return false;
+  }
+  return SubHelper<OverflowSubLNode>::can_overflow(t1, t2);
+}
+
+bool OverflowMulLNode::can_overflow(const Type* t1, const Type* t2) const {
+  return MulHelper<OverflowMulLNode>::can_overflow(t1, t2);
+}
+
+const Type* OverflowNode::sub(const Type* t1, const Type* t2) const {
+  fatal(err_msg_res("sub() should not be called for '%s'", NodeClassNames[this->Opcode()]));
+  return TypeInt::CC;
+}
+
+template <typename OverflowOp>
+struct IdealHelper {
+  typedef typename OverflowOp::TypeClass TypeClass; // TypeInt, TypeLong
+  typedef typename TypeClass::NativeType NativeType;
+
+  static Node* Ideal(const OverflowOp* node, PhaseGVN* phase, bool can_reshape) {
+    Node* arg1 = node->in(1);
+    Node* arg2 = node->in(2);
+    const Type* type1 = phase->type(arg1);
+    const Type* type2 = phase->type(arg2);
+
+    if (type1 == NULL || type2 == NULL) {
+      return NULL;
     }
-  }

-  if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) {
-    return no_overflow(phase, ConLNode::make(phase->C, 0));
+    if (type1 != Type::TOP && type1->singleton() &&
+        type2 != Type::TOP && type2->singleton()) {
+      NativeType val1 = TypeClass::as_self(type1)->get_con();
+      NativeType val2 = TypeClass::as_self(type2)->get_con();
+      if (node->will_overflow(val1, val2) == false) {
+        Node* con_result = ConINode::make(phase->C, 0);
+        return con_result;
+      }
+      return NULL;
+    }
+    return NULL;
   }

-  if (type1 == TypeLong::ONE) {
-    Node* mul_result = new (phase->C) AddLNode(arg2, phase->longcon(0));
-    return no_overflow(phase, mul_result);
-  }
-  if (type2 == TypeLong::ONE) {
-    Node* mul_result = new (phase->C) AddLNode(arg1, phase->longcon(0));
-    return no_overflow(phase, mul_result);
-  }
+  static const Type* Value(const OverflowOp* node, PhaseTransform* phase) {
+    const Type *t1 = phase->type( node->in(1) );
+    const Type *t2 = phase->type( node->in(2) );
+    if( t1 == Type::TOP ) return Type::TOP;
+    if( t2 == Type::TOP ) return Type::TOP;
+
+    const TypeClass* i1 = TypeClass::as_self(t1);
+    const TypeClass* i2 = TypeClass::as_self(t2);
+
+    if (i1 == NULL || i2 == NULL) {
+      return TypeInt::CC;
+    }

-  if (type1 == TypeLong::MINUS_1) {
-    return new (phase->C) NegExactLNode(NULL, arg2);
-  }
+    if (t1->singleton() && t2->singleton()) {
+      NativeType val1 = i1->get_con();
+      NativeType val2 = i2->get_con();
+      if (node->will_overflow(val1, val2)) {
+        return TypeInt::CC;
+      }
+      return TypeInt::ZERO;
+    } else if (i1 != TypeClass::TYPE_DOMAIN && i2 != TypeClass::TYPE_DOMAIN) {
+      if (node->will_overflow(i1->_lo, i2->_lo)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_lo, i2->_hi)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_hi, i2->_lo)) {
+        return TypeInt::CC;
+      } else if (node->will_overflow(i1->_hi, i2->_hi)) {
+        return TypeInt::CC;
+      }
+      return TypeInt::ZERO;
+    }

-  if (type2 == TypeLong::MINUS_1) {
-    return new (phase->C) NegExactLNode(NULL, arg1);
+    if (!node->can_overflow(t1, t2)) {
+      return TypeInt::ZERO;
+    }
+    return TypeInt::CC;
   }
+};

-  return NULL;
+Node* OverflowINode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  return IdealHelper<OverflowINode>::Ideal(this, phase, can_reshape);
 }

+Node* OverflowLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  return IdealHelper<OverflowLNode>::Ideal(this, phase, can_reshape);
+}
+
+const Type* OverflowINode::Value(PhaseTransform* phase) const {
+  return IdealHelper<OverflowINode>::Value(this, phase);
+}
+
+const Type* OverflowLNode::Value(PhaseTransform* phase) const {
+  return IdealHelper<OverflowLNode>::Value(this, phase);
+}
+
--- a/src/share/vm/opto/mathexactnode.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/mathexactnode.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -27,128 +27,111 @@

 #include "opto/multnode.hpp"
 #include "opto/node.hpp"
+#include "opto/addnode.hpp"
 #include "opto/subnode.hpp"
 #include "opto/type.hpp"

-class BoolNode;
-class IfNode;
-class Node;
-
 class PhaseGVN;
 class PhaseTransform;

-class MathExactNode : public MultiNode {
+class OverflowNode : public CmpNode {
 public:
-  MathExactNode(Node* ctrl, Node* in1);
-  MathExactNode(Node* ctrl, Node* in1, Node* in2);
-  enum {
-    result_proj_node = 0,
-    flags_proj_node = 1
-  };
-  virtual int Opcode() const;
-  virtual Node* Identity(PhaseTransform* phase) { return this; }
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape) { return NULL; }
-  virtual const Type* Value(PhaseTransform* phase) const { return bottom_type(); }
-  virtual uint hash() const { return NO_HASH; }
-  virtual bool is_CFG() const { return false; }
-  virtual uint ideal_reg() const { return NotAMachineReg; }
+  OverflowNode(Node* in1, Node* in2) : CmpNode(in1, in2) {}

-  ProjNode* result_node() const { return proj_out(result_proj_node); }
-  ProjNode* flags_node() const { return proj_out(flags_proj_node); }
-  Node* control_node() const;
-  Node* non_throwing_branch() const;
-protected:
-  IfNode* if_node() const;
-  BoolNode* bool_node() const;
-  Node* no_overflow(PhaseGVN *phase, Node* new_result);
-};
-
-class MathExactINode : public MathExactNode {
- public:
-  MathExactINode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {}
-  MathExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* match(const ProjNode* proj, const Matcher* m);
-  virtual const Type* bottom_type() const { return TypeTuple::INT_CC_PAIR; }
-};
-
-class MathExactLNode : public MathExactNode {
-public:
-  MathExactLNode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {}
-  MathExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* match(const ProjNode* proj, const Matcher* m);
-  virtual const Type* bottom_type() const { return TypeTuple::LONG_CC_PAIR; }
-};
-
-class AddExactINode : public MathExactINode {
-public:
-  AddExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint ideal_reg() const { return Op_RegFlags; }
+  virtual const Type* sub(const Type* t1, const Type* t2) const;
 };

-class AddExactLNode : public MathExactLNode {
-public:
-  AddExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class SubExactINode : public MathExactINode {
-public:
-  SubExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class SubExactLNode : public MathExactLNode {
-public:
-  SubExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class NegExactINode : public MathExactINode {
-public:
-  NegExactINode(Node* ctrl, Node* in1) : MathExactINode(ctrl, in1) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class NegExactLNode : public MathExactLNode {
+class OverflowINode : public OverflowNode {
 public:
-  NegExactLNode(Node* ctrl, Node* in1) : MathExactLNode(ctrl, in1) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
-
-class MulExactINode : public MathExactINode {
-public:
-  MulExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
-  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
+  typedef TypeInt TypeClass;

-class MulExactLNode : public MathExactLNode {
-public:
-  MulExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {}
-  virtual int Opcode() const;
+  OverflowINode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
   virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
-};
+  virtual const Type* Value(PhaseTransform* phase) const;

-class FlagsProjNode : public ProjNode {
-public:
-  FlagsProjNode(Node* src, uint con) : ProjNode(src, con) {
-    init_class_id(Class_FlagsProj);
-  }
-
-  virtual int Opcode() const;
-  virtual bool is_CFG() const { return false; }
-  virtual const Type* bottom_type() const { return TypeInt::CC; }
-  virtual uint ideal_reg() const { return Op_RegFlags; }
+  virtual bool will_overflow(jint v1, jint v2) const = 0;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
 };


+class OverflowLNode : public OverflowNode {
+public:
+  typedef TypeLong TypeClass;
+
+  OverflowLNode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
+  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
+  virtual const Type* Value(PhaseTransform* phase) const;
+
+  virtual bool will_overflow(jlong v1, jlong v2) const = 0;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
+};
+
+class OverflowAddINode : public OverflowINode {
+public:
+  typedef AddINode MathOp;
+
+  OverflowAddINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
+class OverflowSubINode : public OverflowINode {
+public:
+  typedef SubINode MathOp;
+
+  OverflowSubINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
+class OverflowMulINode : public OverflowINode {
+public:
+  typedef MulINode MathOp;
+
+  OverflowMulINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jint v1, jint v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
+class OverflowAddLNode : public OverflowLNode {
+public:
+  typedef AddLNode MathOp;
+
+  OverflowAddLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
+class OverflowSubLNode : public OverflowLNode {
+public:
+  typedef SubLNode MathOp;
+
+  OverflowSubLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
+class OverflowMulLNode : public OverflowLNode {
+public:
+  typedef MulLNode MathOp;
+
+  OverflowMulLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
+  virtual int Opcode() const;
+
+  virtual bool will_overflow(jlong v1, jlong v2) const;
+  virtual bool can_overflow(const Type* t1, const Type* t2) const;
+};
+
 #endif
--- a/src/share/vm/opto/multnode.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/multnode.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -54,11 +54,6 @@
         assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
         return proj;
       }
-    } else if (p->is_FlagsProj()) {
-      FlagsProjNode *proj = p->as_FlagsProj();
-      if (proj->_con == which_proj) {
-        return proj;
-      }
     } else {
       assert(p == this && this->is_Start(), "else must be proj");
       continue;
--- a/src/share/vm/opto/node.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/node.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -69,7 +69,6 @@
 class EncodePKlassNode;
 class FastLockNode;
 class FastUnlockNode;
-class FlagsProjNode;
 class IfNode;
 class IfFalseNode;
 class IfTrueNode;
@@ -100,7 +99,6 @@
 class MachSpillCopyNode;
 class MachTempNode;
 class Matcher;
-class MathExactNode;
 class MemBarNode;
 class MemBarStoreStoreNode;
 class MemNode;
@@ -575,7 +573,6 @@
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
         DEFINE_CLASS_ID(Initialize,       MemBar, 0)
         DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
-      DEFINE_CLASS_ID(MathExact,   Multi, 4)

     DEFINE_CLASS_ID(Mach,  Node, 1)
       DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -632,7 +629,6 @@
       DEFINE_CLASS_ID(Cmp,   Sub, 0)
         DEFINE_CLASS_ID(FastLock,   Cmp, 0)
         DEFINE_CLASS_ID(FastUnlock, Cmp, 1)
-        DEFINE_CLASS_ID(FlagsProj, Cmp, 2)

     DEFINE_CLASS_ID(MergeMem, Node, 7)
     DEFINE_CLASS_ID(Bool,     Node, 8)
@@ -736,7 +732,6 @@
   DEFINE_CLASS_QUERY(EncodePKlass)
   DEFINE_CLASS_QUERY(FastLock)
   DEFINE_CLASS_QUERY(FastUnlock)
-  DEFINE_CLASS_QUERY(FlagsProj)
   DEFINE_CLASS_QUERY(If)
   DEFINE_CLASS_QUERY(IfFalse)
   DEFINE_CLASS_QUERY(IfTrue)
@@ -765,7 +760,6 @@
   DEFINE_CLASS_QUERY(MachSafePoint)
   DEFINE_CLASS_QUERY(MachSpillCopy)
   DEFINE_CLASS_QUERY(MachTemp)
-  DEFINE_CLASS_QUERY(MathExact)
   DEFINE_CLASS_QUERY(Mem)
   DEFINE_CLASS_QUERY(MemBar)
   DEFINE_CLASS_QUERY(MemBarStoreStore)
--- a/src/share/vm/opto/parse.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/parse.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -480,6 +480,8 @@
   // Helper function to compute array addressing
   Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL);

+  void rtm_deopt();
+
   // Pass current map to exits
   void return_current(Node* value);
--- a/src/share/vm/opto/parse1.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/parse1.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -565,6 +565,10 @@
     set_map(entry_map);
     do_method_entry();
   }
+  if (depth() == 1) {
+    // Add check to deoptimize the nmethod if RTM state was changed
+    rtm_deopt();
+  }

   // Check for bailouts during method entry.
   if (failing()) {
@@ -1982,6 +1986,42 @@
   set_control( _gvn.transform(result_rgn) );
 }

+// Add check to deoptimize if RTM state is not ProfileRTM
+void Parse::rtm_deopt() {
+#if INCLUDE_RTM_OPT
+  if (C->profile_rtm()) {
+    assert(C->method() != NULL, "only for normal compilations");
+    assert(!C->method()->method_data()->is_empty(), "MDO is needed to record RTM state");
+    assert(depth() == 1, "generate check only for main compiled method");
+
+    // Set starting bci for uncommon trap.
+    set_parse_bci(is_osr_parse() ? osr_bci() : 0);
+
+    // Load the rtm_state from the MethodData.
+    const TypePtr* adr_type = TypeMetadataPtr::make(C->method()->method_data());
+    Node* mdo = makecon(adr_type);
+    int offset = MethodData::rtm_state_offset_in_bytes();
+    Node* adr_node = basic_plus_adr(mdo, mdo, offset);
+    Node* rtm_state = make_load(control(), adr_node, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
+
+    // Separate Load from Cmp by Opaque.
+    // In expand_macro_nodes() it will be replaced either
+    // with this load when there are locks in the code
+    // or with ProfileRTM (cmp->in(2)) otherwise so that
+    // the check will fold.
+    Node* profile_state = makecon(TypeInt::make(ProfileRTM));
+    Node* opq   = _gvn.transform( new (C) Opaque3Node(C, rtm_state, Opaque3Node::RTM_OPT) );
+    Node* chk   = _gvn.transform( new (C) CmpINode(opq, profile_state) );
+    Node* tst   = _gvn.transform( new (C) BoolNode(chk, BoolTest::eq) );
+    // Branch to failure if state was changed
+    { BuildCutout unless(this, tst, PROB_ALWAYS);
+      uncommon_trap(Deoptimization::Reason_rtm_state_change,
+                    Deoptimization::Action_make_not_entrant);
+    }
+  }
+#endif
+}
+
 //------------------------------return_current---------------------------------
 // Append current _map to _exit_return
 void Parse::return_current(Node* value) {
--- a/src/share/vm/opto/runtime.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/runtime.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1311,6 +1311,14 @@
         tty->print_cr("%s", c->name());
         blc->print_on(tty);
       }
+#if INCLUDE_RTM_OPT
+    } else if (c->tag() == NamedCounter::RTMLockingCounter) {
+      RTMLockingCounters* rlc = ((RTMLockingNamedCounter*)c)->counters();
+      if (rlc->nonzero()) {
+        tty->print_cr("%s", c->name());
+        rlc->print_on(tty);
+      }
+#endif
     }
     c = c->next();
   }
@@ -1350,6 +1358,8 @@
   NamedCounter* c;
   if (tag == NamedCounter::BiasedLockingCounter) {
     c = new BiasedLockingNamedCounter(strdup(st.as_string()));
+  } else if (tag == NamedCounter::RTMLockingCounter) {
+    c = new RTMLockingNamedCounter(strdup(st.as_string()));
   } else {
     c = new NamedCounter(strdup(st.as_string()), tag);
   }
@@ -1358,6 +1368,7 @@
   // add counters so this is safe.
   NamedCounter* head;
   do {
+    c->set_next(NULL);
     head = _named_counters;
     c->set_next(head);
   } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
--- a/src/share/vm/opto/runtime.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/runtime.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -29,6 +29,7 @@
 #include "opto/machnode.hpp"
 #include "opto/type.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/rtmLocking.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/vframe.hpp"

@@ -61,7 +62,8 @@
     NoTag,
     LockCounter,
     EliminatedLockCounter,
-    BiasedLockingCounter
+    BiasedLockingCounter,
+    RTMLockingCounter
   };

 private:
@@ -85,7 +87,7 @@

   NamedCounter* next() const    { return _next; }
   void set_next(NamedCounter* next) {
-    assert(_next == NULL, "already set");
+    assert(_next == NULL || next == NULL, "already set");
     _next = next;
   }

@@ -102,6 +104,18 @@
   BiasedLockingCounters* counters() { return &_counters; }
 };

+
+class RTMLockingNamedCounter : public NamedCounter {
+ private:
+ RTMLockingCounters _counters;
+
+ public:
+  RTMLockingNamedCounter(const char *n) :
+    NamedCounter(n, RTMLockingCounter), _counters() {}
+
+  RTMLockingCounters* counters() { return &_counters; }
+};
+
 typedef const TypeFunc*(*TypeFunc_generator)();

 class OptoRuntime : public AllStatic {
--- a/src/share/vm/opto/subnode.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/subnode.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1126,11 +1126,15 @@
   Node *cmp = in(1);
   if( !cmp->is_Sub() ) return NULL;
   int cop = cmp->Opcode();
-  if( cop == Op_FastLock || cop == Op_FastUnlock || cop == Op_FlagsProj) return NULL;
+  if( cop == Op_FastLock || cop == Op_FastUnlock) return NULL;
   Node *cmp1 = cmp->in(1);
   Node *cmp2 = cmp->in(2);
   if( !cmp1 ) return NULL;

+  if (_test._test == BoolTest::overflow || _test._test == BoolTest::no_overflow) {
+    return NULL;
+  }
+
   // Constant on left?
   Node *con = cmp1;
   uint op2 = cmp2->Opcode();
--- a/src/share/vm/opto/type.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/type.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -306,6 +306,7 @@
   TypeInt::POS1    = TypeInt::make(1,max_jint,   WidenMin); // Positive values
   TypeInt::INT     = TypeInt::make(min_jint,max_jint, WidenMax); // 32-bit integers
   TypeInt::SYMINT  = TypeInt::make(-max_jint,max_jint,WidenMin); // symmetric range
+  TypeInt::TYPE_DOMAIN  = TypeInt::INT;
   // CmpL is overloaded both as the bytecode computation returning
   // a trinary (-1,0,+1) integer result AND as an efficient long
   // compare returning optimizer ideal-type flags.
@@ -322,6 +323,7 @@
   TypeLong::LONG    = TypeLong::make(min_jlong,max_jlong,WidenMax); // 64-bit integers
   TypeLong::INT     = TypeLong::make((jlong)min_jint,(jlong)max_jint,WidenMin);
   TypeLong::UINT    = TypeLong::make(0,(jlong)max_juint,WidenMin);
+  TypeLong::TYPE_DOMAIN  = TypeLong::LONG;

   const Type **fboth =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
   fboth[0] = Type::CONTROL;
@@ -1161,6 +1163,7 @@
 const TypeInt *TypeInt::POS1;   // Positive 32-bit integers
 const TypeInt *TypeInt::INT;    // 32-bit integers
 const TypeInt *TypeInt::SYMINT; // symmetric range [-max_jint..max_jint]
+const TypeInt *TypeInt::TYPE_DOMAIN; // alias for TypeInt::INT

 //------------------------------TypeInt----------------------------------------
 TypeInt::TypeInt( jint lo, jint hi, int w ) : Type(Int), _lo(lo), _hi(hi), _widen(w) {
@@ -1418,6 +1421,7 @@
 const TypeLong *TypeLong::LONG; // 64-bit integers
 const TypeLong *TypeLong::INT;  // 32-bit subrange
 const TypeLong *TypeLong::UINT; // 32-bit unsigned subrange
+const TypeLong *TypeLong::TYPE_DOMAIN; // alias for TypeLong::LONG

 //------------------------------TypeLong---------------------------------------
 TypeLong::TypeLong( jlong lo, jlong hi, int w ) : Type(Long), _lo(lo), _hi(hi), _widen(w) {
@@ -2459,7 +2463,7 @@
 const TypeOopPtr *TypeOopPtr::BOTTOM;

 //------------------------------TypeOopPtr-------------------------------------
-TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative)
+TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth)
   : TypePtr(t, ptr, offset),
     _const_oop(o), _klass(k),
     _klass_is_exact(xk),
@@ -2467,7 +2471,8 @@
     _is_ptr_to_narrowklass(false),
     _is_ptr_to_boxed_value(false),
     _instance_id(instance_id),
-    _speculative(speculative) {
+    _speculative(speculative),
+    _inline_depth(inline_depth){
   if (Compile::current()->eliminate_boxing() && (t == InstPtr) &&
       (offset > 0) && xk && (k != 0) && k->is_instance_klass()) {
     _is_ptr_to_boxed_value = k->as_instance_klass()->is_boxed_value_offset(offset);
@@ -2534,12 +2539,12 @@

 //------------------------------make-------------------------------------------
 const TypeOopPtr *TypeOopPtr::make(PTR ptr,
-                                   int offset, int instance_id, const TypeOopPtr* speculative) {
+                                   int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) {
   assert(ptr != Constant, "no constant generic pointers");
   ciKlass*  k = Compile::current()->env()->Object_klass();
   bool      xk = false;
   ciObject* o = NULL;
-  return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative))->hashcons();
+  return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative, inline_depth))->hashcons();
 }


@@ -2547,7 +2552,7 @@
 const Type *TypeOopPtr::cast_to_ptr_type(PTR ptr) const {
   assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
   if( ptr == _ptr ) return this;
-  return make(ptr, _offset, _instance_id, _speculative);
+  return make(ptr, _offset, _instance_id, _speculative, _inline_depth);
 }

 //-----------------------------cast_to_instance_id----------------------------
@@ -2644,7 +2649,7 @@
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
-      return make(ptr, offset, instance_id, speculative);
+      return make(ptr, offset, instance_id, speculative, _inline_depth);
     }
     case BotPTR:
     case NotNull:
@@ -2657,7 +2662,8 @@
     const TypeOopPtr *tp = t->is_oopptr();
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
-    return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative);
+    int depth = meet_inline_depth(tp->inline_depth());
+    return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative, depth);
   }

   case InstPtr:                  // For these, flip the call around to cut down
@@ -2674,7 +2680,7 @@
 const Type *TypeOopPtr::xdual() const {
   assert(klass() == Compile::current()->env()->Object_klass(), "no klasses here");
   assert(const_oop() == NULL,             "no constants here");
-  return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative());
+  return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth());
 }

 //--------------------------make_from_klass_common-----------------------------
@@ -2765,7 +2771,7 @@
     } else if (!o->should_be_constant()) {
       return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
     }
-    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, is_autobox_cache);
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, InlineDepthBottom, is_autobox_cache);
     return arr;
   } else if (klass->is_type_array_klass()) {
     // Element is an typeArray
@@ -2854,7 +2860,8 @@
   const TypeOopPtr *a = (const TypeOopPtr*)t;
   if (_klass_is_exact != a->_klass_is_exact ||
       _instance_id != a->_instance_id ||
-      !eq_speculative(a))  return false;
+      !eq_speculative(a) ||
+      _inline_depth != a->_inline_depth)  return false;
   ciObject* one = const_oop();
   ciObject* two = a->const_oop();
   if (one == NULL || two == NULL) {
@@ -2872,6 +2879,7 @@
     _klass_is_exact +
     _instance_id +
     hash_speculative() +
+    _inline_depth +
     TypePtr::hash();
 }

@@ -2892,6 +2900,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);

+  dump_inline_depth(st);
   dump_speculative(st);
 }

@@ -2905,6 +2914,16 @@
     st->print(")");
   }
 }
+
+void TypeOopPtr::dump_inline_depth(outputStream *st) const {
+  if (_inline_depth != InlineDepthBottom) {
+    if (_inline_depth == InlineDepthTop) {
+      st->print(" (inline_depth=InlineDepthTop)");
+    } else {
+      st->print(" (inline_depth=%d)", _inline_depth);
+    }
+  }
+}
 #endif

 //------------------------------singleton--------------------------------------
@@ -2918,7 +2937,7 @@

 //------------------------------add_offset-------------------------------------
 const TypePtr *TypeOopPtr::add_offset(intptr_t offset) const {
-  return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset));
+  return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth);
 }

 /**
@@ -2928,7 +2947,52 @@
   if (_speculative == NULL) {
     return this;
   }
-  return make(_ptr, _offset, _instance_id, NULL);
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, _offset, _instance_id, NULL, _inline_depth);
+}
+
+/**
+ * Return same type but with a different inline depth (used for speculation)
+ *
+ * @param depth  depth to meet with
+ */
+const TypeOopPtr* TypeOopPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, _offset, _instance_id, _speculative, depth);
+}
+
+/**
+ * Check whether new profiling would improve speculative type
+ *
+ * @param   exact_kls    class from profiling
+ * @param   inline_depth inlining depth of profile point
+ *
+ * @return  true if type profile is valuable
+ */
+bool TypeOopPtr::would_improve_type(ciKlass* exact_kls, int inline_depth) const {
+  // no way to improve an already exact type
+  if (klass_is_exact()) {
+    return false;
+  }
+  // no profiling?
+  if (exact_kls == NULL) {
+    return false;
+  }
+  // no speculative type or non exact speculative type?
+  if (speculative_type() == NULL) {
+    return true;
+  }
+  // If the node already has an exact speculative type keep it,
+  // unless it was provided by profiling that is at a deeper
+  // inlining level. Profiling at a higher inlining depth is
+  // expected to be less accurate.
+  if (_speculative->inline_depth() == InlineDepthBottom) {
+    return false;
+  }
+  assert(_speculative->inline_depth() != InlineDepthTop, "can't do the comparison");
+  return inline_depth < _speculative->inline_depth();
 }

 //------------------------------meet_instance_id--------------------------------
@@ -3031,6 +3095,21 @@
   return _speculative->hash();
 }

+/**
+ * dual of the inline depth for this type (used for speculation)
+ */
+int TypeOopPtr::dual_inline_depth() const {
+  return -inline_depth();
+}
+
+/**
+ * meet of 2 inline depth (used for speculation)
+ *
+ * @param depth  depth to meet with
+ */
+int TypeOopPtr::meet_inline_depth(int depth) const {
+  return MAX2(inline_depth(), depth);
+}

 //=============================================================================
 // Convenience common pre-built types.
@@ -3041,8 +3120,8 @@
 const TypeInstPtr *TypeInstPtr::KLASS;

 //------------------------------TypeInstPtr-------------------------------------
-TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative)
-  : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative), _name(k->name()) {
+TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative, int inline_depth)
+  : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative, inline_depth), _name(k->name()) {
    assert(k != NULL &&
           (k->is_loaded() || o == NULL),
           "cannot have constants with non-loaded klass");
@@ -3055,7 +3134,8 @@
                                      ciObject* o,
                                      int offset,
                                      int instance_id,
-                                     const TypeOopPtr* speculative) {
+                                     const TypeOopPtr* speculative,
+                                     int inline_depth) {
   assert( !k->is_loaded() || k->is_instance_klass(), "Must be for instance");
   // Either const_oop() is NULL or else ptr is Constant
   assert( (!o && ptr != Constant) || (o && ptr == Constant),
@@ -3076,7 +3156,7 @@

   // Now hash this baby
   TypeInstPtr *result =
-    (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative))->hashcons();
+    (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative, inline_depth))->hashcons();

   return result;
 }
@@ -3109,7 +3189,7 @@
   if( ptr == _ptr ) return this;
   // Reconstruct _sig info here since not a problem with later lazy
   // construction, _sig will show up on demand.
-  return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative);
+  return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, _inline_depth);
 }


@@ -3121,13 +3201,13 @@
   ciInstanceKlass* ik = _klass->as_instance_klass();
   if( (ik->is_final() || _const_oop) )  return this;  // cannot clear xk
   if( ik->is_interface() )              return this;  // cannot set xk
-  return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative);
+  return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative, _inline_depth);
 }

 //-----------------------------cast_to_instance_id----------------------------
 const TypeOopPtr *TypeInstPtr::cast_to_instance_id(int instance_id) const {
   if( instance_id == _instance_id ) return this;
-  return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative);
+  return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative, _inline_depth);
 }

 //------------------------------xmeet_unloaded---------------------------------
@@ -3138,6 +3218,7 @@
     PTR ptr = meet_ptr(tinst->ptr());
     int instance_id = meet_instance_id(tinst->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tinst);
+    int depth = meet_inline_depth(tinst->inline_depth());

     const TypeInstPtr *loaded    = is_loaded() ? this  : tinst;
     const TypeInstPtr *unloaded  = is_loaded() ? tinst : this;
@@ -3158,7 +3239,7 @@
       assert(loaded->ptr() != TypePtr::Null, "insanity check");
       //
       if(      loaded->ptr() == TypePtr::TopPTR ) { return unloaded; }
-      else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative); }
+      else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative, depth); }
       else if (loaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; }
       else if (loaded->ptr() == TypePtr::Constant || loaded->ptr() == TypePtr::NotNull) {
         if (unloaded->ptr() == TypePtr::BotPTR  ) { return TypeInstPtr::BOTTOM;  }
@@ -3215,6 +3296,7 @@
     PTR ptr = meet_ptr(tp->ptr());
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (ptr) {
     case TopPTR:
     case AnyNull:                // Fall 'down' to dual of object klass
@@ -3222,12 +3304,12 @@
       // below the centerline when the superclass is exact. We need to
       // do the same here.
       if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) {
-        return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative);
+        return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth);
       } else {
         // cannot subclass, so the meet has to fall badly below the centerline
         ptr = NotNull;
         instance_id = InstanceBot;
-        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative);
+        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth);
       }
     case Constant:
     case NotNull:
@@ -3242,7 +3324,7 @@
         if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) {
           // that is, tp's array type is a subtype of my klass
           return TypeAryPtr::make(ptr, (ptr == Constant ? tp->const_oop() : NULL),
-                                  tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative);
+                                  tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth);
         }
       }
       // The other case cannot happen, since I cannot be a subtype of an array.
@@ -3250,7 +3332,7 @@
       if( ptr == Constant )
          ptr = NotNull;
       instance_id = InstanceBot;
-      return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative);
+      return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth);
     default: typerr(t);
     }
   }
@@ -3265,14 +3347,16 @@
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = xmeet_speculative(tp);
+      int depth = meet_inline_depth(tp->inline_depth());
       return make(ptr, klass(), klass_is_exact(),
-                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative);
+                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, depth);
     }
     case NotNull:
     case BotPTR: {
       int instance_id = meet_instance_id(tp->instance_id());
       const TypeOopPtr* speculative = xmeet_speculative(tp);
-      return TypeOopPtr::make(ptr, offset, instance_id, speculative);
+      int depth = meet_inline_depth(tp->inline_depth());
+      return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth);
     }
     default: typerr(t);
     }
@@ -3292,7 +3376,7 @@
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
       return make(ptr, klass(), klass_is_exact(),
-                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative);
+                  (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, _inline_depth);
     }
     case NotNull:
     case BotPTR:
@@ -3324,13 +3408,14 @@
     PTR ptr = meet_ptr( tinst->ptr() );
     int instance_id = meet_instance_id(tinst->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tinst);
+    int depth = meet_inline_depth(tinst->inline_depth());

     // Check for easy case; klasses are equal (and perhaps not loaded!)
     // If we have constants, then we created oops so classes are loaded
     // and we can handle the constants further down.  This case handles
     // both-not-loaded or both-loaded classes
     if (ptr != Constant && klass()->equals(tinst->klass()) && klass_is_exact() == tinst->klass_is_exact()) {
-      return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative);
+      return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative, depth);
     }

     // Classes require inspection in the Java klass hierarchy.  Must be loaded.
@@ -3394,7 +3479,7 @@
         // Find out which constant.
         o = (this_klass == klass()) ? const_oop() : tinst->const_oop();
       }
-      return make(ptr, k, xk, o, off, instance_id, speculative);
+      return make(ptr, k, xk, o, off, instance_id, speculative, depth);
     }

     // Either oop vs oop or interface vs interface or interface vs Object
@@ -3471,7 +3556,7 @@
         else
           ptr = NotNull;
       }
-      return make(ptr, this_klass, this_xk, o, off, instance_id, speculative);
+      return make(ptr, this_klass, this_xk, o, off, instance_id, speculative, depth);
     } // Else classes are not equal

     // Since klasses are different, we require a LCA in the Java
@@ -3482,7 +3567,7 @@

     // Now we find the LCA of Java classes
     ciKlass* k = this_klass->least_common_ancestor(tinst_klass);
-    return make(ptr, k, false, NULL, off, instance_id, speculative);
+    return make(ptr, k, false, NULL, off, instance_id, speculative, depth);
   } // End of case InstPtr

   } // End of switch
@@ -3506,7 +3591,7 @@
 // Dual: do NOT dual on klasses.  This means I do NOT understand the Java
 // inheritance mechanism.
 const Type *TypeInstPtr::xdual() const {
-  return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative());
+  return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth());
 }

 //------------------------------eq---------------------------------------------
@@ -3563,6 +3648,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);

+  dump_inline_depth(st);
   dump_speculative(st);
 }
 #endif
@@ -3576,7 +3662,15 @@
   if (_speculative == NULL) {
     return this;
   }
-  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL);
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL, _inline_depth);
+}
+
+const TypeOopPtr *TypeInstPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, depth);
 }

 //=============================================================================
@@ -3593,30 +3687,30 @@
 const TypeAryPtr *TypeAryPtr::DOUBLES;

 //------------------------------make-------------------------------------------
-const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative) {
+const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) {
   assert(!(k == NULL && ary->_elem->isa_int()),
          "integral arrays must be pre-equipped with a class");
   if (!xk)  xk = ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative, inline_depth))->hashcons();
 }

 //------------------------------make-------------------------------------------
-const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, bool is_autobox_cache) {
+const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth, bool is_autobox_cache) {
   assert(!(k == NULL && ary->_elem->isa_int()),
          "integral arrays must be pre-equipped with a class");
   assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" );
   if (!xk)  xk = (o != NULL) || ary->ary_must_be_exact();
   assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed");
   if (!UseExactTypes)  xk = (ptr == Constant);
-  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative))->hashcons();
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative, inline_depth))->hashcons();
 }

 //------------------------------cast_to_ptr_type-------------------------------
 const Type *TypeAryPtr::cast_to_ptr_type(PTR ptr) const {
   if( ptr == _ptr ) return this;
-  return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative);
+  return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth);
 }


@@ -3625,13 +3719,13 @@
   if( klass_is_exact == _klass_is_exact ) return this;
   if (!UseExactTypes)  return this;
   if (_ary->ary_must_be_exact())  return this;  // cannot clear xk
-  return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative);
+  return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative, _inline_depth);
 }

 //-----------------------------cast_to_instance_id----------------------------
 const TypeOopPtr *TypeAryPtr::cast_to_instance_id(int instance_id) const {
   if( instance_id == _instance_id ) return this;
-  return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative);
+  return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative, _inline_depth);
 }

 //-----------------------------narrow_size_type-------------------------------
@@ -3694,7 +3788,7 @@
   new_size = narrow_size_type(new_size);
   if (new_size == size())  return this;
   const TypeAry* new_ary = TypeAry::make(elem(), new_size, is_stable());
-  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative);
+  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth);
 }


@@ -3773,19 +3867,20 @@
     const TypeOopPtr *tp = t->is_oopptr();
     int offset = meet_offset(tp->offset());
     PTR ptr = meet_ptr(tp->ptr());
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (tp->ptr()) {
     case TopPTR:
     case AnyNull: {
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = xmeet_speculative(tp);
       return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
     }
     case BotPTR:
     case NotNull: {
       int instance_id = meet_instance_id(tp->instance_id());
       const TypeOopPtr* speculative = xmeet_speculative(tp);
-      return TypeOopPtr::make(ptr, offset, instance_id, speculative);
+      return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth);
     }
     default: ShouldNotReachHere();
     }
@@ -3809,7 +3904,7 @@
       int instance_id = meet_instance_id(InstanceTop);
       const TypeOopPtr* speculative = _speculative;
       return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                  _ary, _klass, _klass_is_exact, offset, instance_id, speculative, _inline_depth);
     }
     default: ShouldNotReachHere();
     }
@@ -3826,6 +3921,7 @@
     PTR ptr = meet_ptr(tap->ptr());
     int instance_id = meet_instance_id(tap->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tap);
+    int depth = meet_inline_depth(tap->inline_depth());
     ciKlass* lazy_klass = NULL;
     if (tary->_elem->isa_int()) {
       // Integral array element types have irrelevant lattice relations.
@@ -3866,7 +3962,7 @@
       } else {
         xk = (tap->_klass_is_exact | this->_klass_is_exact);
       }
-      return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative);
+      return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative, depth);
     case Constant: {
       ciObject* o = const_oop();
       if( _ptr == Constant ) {
@@ -3885,7 +3981,7 @@
         // Only precise for identical arrays
         xk = this->_klass_is_exact && (klass() == tap->klass());
       }
-      return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative);
+      return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative, depth);
     }
     case NotNull:
     case BotPTR:
@@ -3894,7 +3990,7 @@
             xk = tap->_klass_is_exact;
       else  xk = (tap->_klass_is_exact & this->_klass_is_exact) &&
               (klass() == tap->klass()); // Only precise for identical arrays
-      return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative);
+      return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative, depth);
     default: ShouldNotReachHere();
     }
   }
@@ -3906,6 +4002,7 @@
     PTR ptr = meet_ptr(tp->ptr());
     int instance_id = meet_instance_id(tp->instance_id());
     const TypeOopPtr* speculative = xmeet_speculative(tp);
+    int depth = meet_inline_depth(tp->inline_depth());
     switch (ptr) {
     case TopPTR:
     case AnyNull:                // Fall 'down' to dual of object klass
@@ -3913,12 +4010,12 @@
       // below the centerline when the superclass is exact. We need to
       // do the same here.
       if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) {
-        return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+        return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
       } else {
         // cannot subclass, so the meet has to fall badly below the centerline
         ptr = NotNull;
         instance_id = InstanceBot;
-        return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative);
+        return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth);
       }
     case Constant:
     case NotNull:
@@ -3933,7 +4030,7 @@
         if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) {
           // that is, my array type is a subtype of 'tp' klass
           return make(ptr, (ptr == Constant ? const_oop() : NULL),
-                      _ary, _klass, _klass_is_exact, offset, instance_id, speculative);
+                      _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth);
         }
       }
       // The other case cannot happen, since t cannot be a subtype of an array.
@@ -3941,7 +4038,7 @@
       if( ptr == Constant )
          ptr = NotNull;
       instance_id = InstanceBot;
-      return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative);
+      return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth);
     default: typerr(t);
     }
   }
@@ -3952,7 +4049,7 @@
 //------------------------------xdual------------------------------------------
 // Dual: compute field-by-field dual
 const Type *TypeAryPtr::xdual() const {
-  return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative());
+  return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative(), dual_inline_depth());
 }

 //----------------------interface_vs_oop---------------------------------------
@@ -4005,6 +4102,7 @@
   else if (_instance_id != InstanceBot)
     st->print(",iid=%d",_instance_id);

+  dump_inline_depth(st);
   dump_speculative(st);
 }
 #endif
@@ -4016,11 +4114,22 @@

 //------------------------------add_offset-------------------------------------
 const TypePtr *TypeAryPtr::add_offset(intptr_t offset) const {
-  return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset));
+  return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth);
 }

 const Type *TypeAryPtr::remove_speculative() const {
-  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL);
+  if (_speculative == NULL) {
+    return this;
+  }
+  assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth");
+  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL, _inline_depth);
+}
+
+const TypeOopPtr *TypeAryPtr::with_inline_depth(int depth) const {
+  if (!UseInlineDepthForSpeculativeTypes) {
+    return this;
+  }
+  return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, _speculative, depth);
 }

 //=============================================================================
@@ -4271,7 +4380,7 @@
       // else fall through:
     case TopPTR:
     case AnyNull: {
-      return make(ptr, NULL, offset);
+      return make(ptr, _metadata, offset);
     }
     case BotPTR:
     case NotNull:
--- a/src/share/vm/opto/type.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/opto/type.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -415,10 +415,15 @@
                                         bool is_autobox_cache = false);

   // Speculative type. See TypeInstPtr
+  virtual const TypeOopPtr* speculative() const { return NULL; }
   virtual ciKlass* speculative_type() const { return NULL; }
   const Type* maybe_remove_speculative(bool include_speculative) const;
   virtual const Type* remove_speculative() const { return this; }

+  virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const {
+    return exact_kls != NULL;
+  }
+
 private:
   // support arrays
   static const BasicType _basic_type[];
@@ -489,6 +494,7 @@
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;

 public:
+  typedef jint NativeType;
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
@@ -531,6 +537,9 @@
   static const TypeInt *POS1;
   static const TypeInt *INT;
   static const TypeInt *SYMINT; // symmetric range [-max_jint..max_jint]
+  static const TypeInt *TYPE_DOMAIN; // alias for TypeInt::INT
+
+  static const TypeInt *as_self(const Type *t) { return t->is_int(); }
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
 #endif
@@ -546,6 +555,7 @@
   // Do not kill _widen bits.
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;
 public:
+  typedef jlong NativeType;
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
@@ -568,6 +578,7 @@

   virtual bool        is_finite() const;  // Has a finite value

+
   virtual const Type *xmeet( const Type *t ) const;
   virtual const Type *xdual() const;    // Compute dual right now.
   virtual const Type *widen( const Type *t, const Type* limit_type ) const;
@@ -580,6 +591,11 @@
   static const TypeLong *LONG;
   static const TypeLong *INT;    // 32-bit subrange [min_jint..max_jint]
   static const TypeLong *UINT;   // 32-bit unsigned [0..max_juint]
+  static const TypeLong *TYPE_DOMAIN; // alias for TypeLong::LONG
+
+  // static convenience methods.
+  static const TypeLong *as_self(const Type *t) { return t->is_long(); }
+
 #ifndef PRODUCT
   virtual void dump2( Dict &d, uint, outputStream *st  ) const;// Specialized per-Type dumping
 #endif
@@ -834,7 +850,7 @@
 // Some kind of oop (Java pointer), either klass or instance or array.
 class TypeOopPtr : public TypePtr {
 protected:
-  TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative);
+  TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth);
 public:
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
@@ -845,6 +861,10 @@
   };
 protected:

+  enum {
+    InlineDepthBottom = INT_MAX,
+    InlineDepthTop = -InlineDepthBottom
+  };
   // Oop is NULL, unless this is a constant oop.
   ciObject*     _const_oop;   // Constant oop
   // If _klass is NULL, then so is _sig.  This is an unloaded klass.
@@ -865,6 +885,11 @@
   // use it, then we have to emit a guard: this part of the type is
   // not something we know but something we speculate about the type.
   const TypeOopPtr*   _speculative;
+  // For speculative types, we record at what inlining depth the
+  // profiling point that provided the data is. We want to favor
+  // profile data coming from outer scopes which are likely better for
+  // the current compilation.
+  int _inline_depth;

   static const TypeOopPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact);

@@ -880,6 +905,12 @@
 #ifndef PRODUCT
   void dump_speculative(outputStream *st) const;
 #endif
+  // utility methods to work on the inline depth of the type
+  int dual_inline_depth() const;
+  int meet_inline_depth(int depth) const;
+#ifndef PRODUCT
+  void dump_inline_depth(outputStream *st) const;
+#endif

   // Do not allow interface-vs.-noninterface joins to collapse to top.
   virtual const Type *filter_helper(const Type *kills, bool include_speculative) const;
@@ -910,7 +941,7 @@
                                               bool not_null_elements = false);

   // Make a generic (unclassed) pointer to an oop.
-  static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative);
+  static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);

   ciObject* const_oop()    const { return _const_oop; }
   virtual ciKlass* klass() const { return _klass;     }
@@ -924,7 +955,7 @@
   bool is_known_instance()       const { return _instance_id > 0; }
   int  instance_id()             const { return _instance_id; }
   bool is_known_instance_field() const { return is_known_instance() && _offset >= 0; }
-  const TypeOopPtr* speculative() const { return _speculative; }
+  virtual const TypeOopPtr* speculative() const { return _speculative; }

   virtual intptr_t get_con() const;

@@ -957,18 +988,23 @@
     if (_speculative != NULL) {
       const TypeOopPtr* speculative = _speculative->join(this)->is_oopptr();
       if (speculative->klass_is_exact()) {
-       return speculative->klass();
+        return speculative->klass();
       }
     }
     return NULL;
   }
+  int inline_depth() const {
+    return _inline_depth;
+  }
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;
+  virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const;
 };

 //------------------------------TypeInstPtr------------------------------------
 // Class of Java object pointers, pointing either to non-array Java instances
 // or to a Klass* (including array klasses).
 class TypeInstPtr : public TypeOopPtr {
-  TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative);
+  TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth);
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing

@@ -1004,7 +1040,7 @@
   }

   // Make a pointer to an oop.
-  static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL);
+  static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);

   /** Create constant type for a constant boxed value */
   const Type* get_const_boxed_value() const;
@@ -1023,6 +1059,7 @@
   virtual const TypePtr *add_offset( intptr_t offset ) const;
   // Return same type without a speculative part
   virtual const Type* remove_speculative() const;
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;

   // the core of the computation of the meet of 2 types
   virtual const Type *xmeet_helper(const Type *t) const;
@@ -1044,8 +1081,8 @@
 // Class of Java array pointers
 class TypeAryPtr : public TypeOopPtr {
   TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk,
-              int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative)
-    : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative),
+              int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative, int inline_depth)
+    : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative, inline_depth),
     _ary(ary),
     _is_autobox_cache(is_autobox_cache)
  {
@@ -1083,9 +1120,9 @@

   bool is_autobox_cache() const { return _is_autobox_cache; }

-  static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL);
+  static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom);
   // Constant pointer to array
-  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, bool is_autobox_cache = false);
+  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom, bool is_autobox_cache= false);

   // Return a 'ptr' version of this type
   virtual const Type *cast_to_ptr_type(PTR ptr) const;
@@ -1101,6 +1138,7 @@
   virtual const TypePtr *add_offset( intptr_t offset ) const;
   // Return same type without a speculative part
   virtual const Type* remove_speculative() const;
+  virtual const TypeOopPtr* with_inline_depth(int depth) const;

   // the core of the computation of the meet of 2 types
   virtual const Type *xmeet_helper(const Type *t) const;
@@ -1678,6 +1716,7 @@
 #define ConvL2X(x)   (x)
 #define ConvX2I(x)   ConvL2I(x)
 #define ConvX2L(x)   (x)
+#define ConvX2UL(x)  (x)

 #else

@@ -1722,6 +1761,7 @@
 #define ConvL2X(x)   ConvL2I(x)
 #define ConvX2I(x)   (x)
 #define ConvX2L(x)   ConvI2L(x)
+#define ConvX2UL(x)  ConvI2UL(x)

 #endif
--- a/src/share/vm/prims/jni.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/prims/jni.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -4450,8 +4450,23 @@

     // Get needed field and method IDs
     directByteBufferConstructor = env->GetMethodID(directByteBufferClass, "<init>", "(JI)V");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }
     directBufferAddressField    = env->GetFieldID(bufferClass, "address", "J");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }
     bufferCapacityField         = env->GetFieldID(bufferClass, "capacity", "I");
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      directBufferSupportInitializeFailed = 1;
+      return false;
+    }

     if ((directByteBufferConstructor == NULL) ||
         (directBufferAddressField    == NULL) ||
@@ -5068,6 +5083,7 @@
 #if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
 void TestG1BiasedArray_test();
+void TestCodeCacheRemSet_test();
 #endif

 void execute_internal_vm_tests() {
@@ -5093,6 +5109,7 @@
     run_unit_test(TestOldFreeSpaceCalculation_test());
     run_unit_test(TestG1BiasedArray_test());
     run_unit_test(HeapRegionRemSet::test_prt());
+    run_unit_test(TestCodeCacheRemSet_test());
 #endif
     tty->print_cr("All internal VM tests passed");
   }
--- a/src/share/vm/prims/unsafe.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -861,6 +861,11 @@
   strcpy(buf, "java/lang/");
   strcat(buf, ename);
   jclass cls = env->FindClass(buf);
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf);
+    return;
+  }
   char* msg = NULL;
   env->ThrowNew(cls, msg);
 }
--- a/src/share/vm/prims/whitebox.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/prims/whitebox.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -316,9 +316,10 @@

 WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  int result = 0;
+  CHECK_JNI_EXCEPTION_(env, result);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
-  int result = 0;
   nmethod* code;
   if (is_osr) {
     int bci = InvocationEntryBci;
@@ -344,6 +345,7 @@

 WB_ENTRY(jboolean, WB_IsMethodCompiled(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
@@ -355,6 +357,7 @@

 WB_ENTRY(jboolean, WB_IsMethodCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   if (is_osr) {
@@ -366,6 +369,7 @@

 WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   MutexLockerEx mu(Compile_lock);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   return mh->queued_for_compilation();
@@ -373,6 +377,7 @@

 WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, CompLevel_none);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
   return (code != NULL ? code->comp_level() : CompLevel_none);
@@ -380,6 +385,7 @@

 WB_ENTRY(void, WB_MakeMethodNotCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION(env);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   if (is_osr) {
     mh->set_not_osr_compilable(comp_level, true /* report */, "WhiteBox");
@@ -390,6 +396,7 @@

 WB_ENTRY(jint, WB_GetMethodEntryBci(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, InvocationEntryBci);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* code = mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false);
   return (code != NULL && code->is_osr_method() ? code->osr_entry_bci() : InvocationEntryBci);
@@ -397,6 +404,7 @@

 WB_ENTRY(jboolean, WB_TestSetDontInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   bool result = mh->dont_inline();
   mh->set_dont_inline(value == JNI_TRUE);
@@ -414,6 +422,7 @@

 WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   bool result = mh->force_inline();
   mh->set_force_inline(value == JNI_TRUE);
@@ -422,6 +431,7 @@

 WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD);
   MutexLockerEx mu(Compile_lock);
@@ -430,6 +440,7 @@

 WB_ENTRY(void, WB_ClearMethodState(JNIEnv* env, jobject o, jobject method))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION(env);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
   MutexLockerEx mu(Compile_lock);
   MethodData* mdo = mh->method_data();
@@ -489,6 +500,16 @@
   c = *p;
 WB_END

+WB_ENTRY(jstring, WB_GetCPUFeatures(JNIEnv* env, jobject o))
+  const char* cpu_features = VM_Version::cpu_features();
+  ThreadToNativeFromVM ttn(thread);
+  jstring features_string = env->NewStringUTF(cpu_features);
+
+  CHECK_JNI_EXCEPTION_(env, NULL);
+
+  return features_string;
+WB_END
+
 //Some convenience methods to deal with objects from java
 int WhiteBox::offset_for_field(const char* field_name, oop object,
     Symbol* signature_symbol) {
@@ -600,6 +621,7 @@
   {CC"isInStringTable",   CC"(Ljava/lang/String;)Z",  (void*)&WB_IsInStringTable  },
   {CC"fullGC",   CC"()V",                             (void*)&WB_FullGC },
   {CC"readReservedMemory", CC"()V",                   (void*)&WB_ReadReservedMemory },
+  {CC"getCPUFeatures",     CC"()Ljava/lang/String;",  (void*)&WB_GetCPUFeatures     },
 };

 #undef CC
@@ -616,14 +638,18 @@
         bool result = true;
         //  one by one registration natives for exception catching
         jclass exceptionKlass = env->FindClass(vmSymbols::java_lang_NoSuchMethodError()->as_C_string());
+        CHECK_JNI_EXCEPTION(env);
         for (int i = 0, n = sizeof(methods) / sizeof(methods[0]); i < n; ++i) {
           if (env->RegisterNatives(wbclass, methods + i, 1) != 0) {
             result = false;
-            if (env->ExceptionCheck() && env->IsInstanceOf(env->ExceptionOccurred(), exceptionKlass)) {
-              // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
-              // ignoring the exception
-              tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+            jthrowable throwable_obj = env->ExceptionOccurred();
+            if (throwable_obj != NULL) {
               env->ExceptionClear();
+              if (env->IsInstanceOf(throwable_obj, exceptionKlass)) {
+                // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
+                // ignoring the exception
+                tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+              }
             } else {
               // register is failed w/o exception or w/ unexpected exception
               tty->print_cr("Warning: unexpected error on register of sun.hotspot.WhiteBox::%s%s. All methods will be unregistered", methods[i].name, methods[i].signature);
--- a/src/share/vm/prims/whitebox.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/prims/whitebox.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -36,6 +36,24 @@
 #define WB_END JNI_END
 #define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL

+#define CHECK_JNI_EXCEPTION_(env, value)                               \
+  do {                                                                 \
+    JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+    if (HAS_PENDING_EXCEPTION) {                                       \
+      CLEAR_PENDING_EXCEPTION;                                         \
+      return(value);                                                   \
+    }                                                                  \
+  } while (0)
+
+#define CHECK_JNI_EXCEPTION(env)                                       \
+  do {                                                                 \
+    JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+    if (HAS_PENDING_EXCEPTION) {                                       \
+      CLEAR_PENDING_EXCEPTION;                                         \
+      return;                                                          \
+    }                                                                  \
+  } while (0)
+
 class WhiteBox : public AllStatic {
  private:
   static bool _used;
--- a/src/share/vm/runtime/arguments.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -2220,6 +2220,8 @@
                                        "G1ConcRSHotCardLimit");
     status = status && verify_interval(G1ConcRSLogCacheSize, 0, 31,
                                        "G1ConcRSLogCacheSize");
+    status = status && verify_interval(StringDeduplicationAgeThreshold, 1, markOopDesc::max_age,
+                                       "StringDeduplicationAgeThreshold");
   }
   if (UseConcMarkSweepGC) {
     status = status && verify_min_value(CMSOldPLABNumRefills, 1, "CMSOldPLABNumRefills");
@@ -3749,9 +3751,6 @@
 #endif // CC_INTERP

 #ifdef COMPILER2
-  if (!UseBiasedLocking || EmitSync != 0) {
-    UseOptoBiasInlining = false;
-  }
   if (!EliminateLocks) {
     EliminateNestedLocks = false;
   }
@@ -3812,6 +3811,11 @@
       UseBiasedLocking = false;
     }
   }
+#ifdef COMPILER2
+  if (!UseBiasedLocking || EmitSync != 0) {
+    UseOptoBiasInlining = false;
+  }
+#endif

   // set PauseAtExit if the gamma launcher was used and a debugger is attached
   // but only if not already set on the commandline
--- a/src/share/vm/runtime/deoptimization.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1288,7 +1288,8 @@
     gather_statistics(reason, action, trap_bc);

     // Ensure that we can record deopt. history:
-    bool create_if_missing = ProfileTraps;
+    // Need MDO to record RTM code generation state.
+    bool create_if_missing = ProfileTraps RTM_OPT_ONLY( || UseRTMLocking );

     MethodData* trap_mdo =
       get_method_data(thread, trap_method, create_if_missing);
@@ -1489,6 +1490,7 @@
       bool maybe_prior_trap = false;
       bool maybe_prior_recompile = false;
       pdata = query_update_method_data(trap_mdo, trap_bci, reason,
+                                   nm->method(),
                                    //outputs:
                                    this_trap_count,
                                    maybe_prior_trap,
@@ -1534,7 +1536,7 @@
       }

       // Go back to the compiler if there are too many traps in this method.
-      if (this_trap_count >= (uint)PerMethodTrapLimit) {
+      if (this_trap_count >= per_method_trap_limit(reason)) {
         // If there are too many traps in this method, force a recompile.
         // This will allow the compiler to see the limit overflow, and
         // take corrective action, if possible.
@@ -1568,6 +1570,17 @@
         if (tstate1 != tstate0)
           pdata->set_trap_state(tstate1);
       }
+
+#if INCLUDE_RTM_OPT
+      // Restart collecting RTM locking abort statistic if the method
+      // is recompiled for a reason other than RTM state change.
+      // Assume that in new recompiled code the statistic could be different,
+      // for example, due to different inlining.
+      if ((reason != Reason_rtm_state_change) && (trap_mdo != NULL) &&
+          UseRTMDeopt && (nm->rtm_state() != ProfileRTM)) {
+        trap_mdo->atomic_set_rtm_state(ProfileRTM);
+      }
+#endif
     }

     if (inc_recompile_count) {
@@ -1622,6 +1635,7 @@
 Deoptimization::query_update_method_data(MethodData* trap_mdo,
                                          int trap_bci,
                                          Deoptimization::DeoptReason reason,
+                                         Method* compiled_method,
                                          //outputs:
                                          uint& ret_this_trap_count,
                                          bool& ret_maybe_prior_trap,
@@ -1645,9 +1659,16 @@
     // Find the profile data for this BCI.  If there isn't one,
     // try to allocate one from the MDO's set of spares.
     // This will let us detect a repeated trap at this point.
-    pdata = trap_mdo->allocate_bci_to_data(trap_bci);
+    pdata = trap_mdo->allocate_bci_to_data(trap_bci, reason_is_speculate(reason) ? compiled_method : NULL);

     if (pdata != NULL) {
+      if (reason_is_speculate(reason) && !pdata->is_SpeculativeTrapData()) {
+        if (LogCompilation && xtty != NULL) {
+          ttyLocker ttyl;
+          // no more room for speculative traps in this MDO
+          xtty->elem("speculative_traps_oom");
+        }
+      }
       // Query the trap state of this profile datum.
       int tstate0 = pdata->trap_state();
       if (!trap_state_has_reason(tstate0, per_bc_reason))
@@ -1685,8 +1706,10 @@
   uint ignore_this_trap_count;
   bool ignore_maybe_prior_trap;
   bool ignore_maybe_prior_recompile;
+  assert(!reason_is_speculate(reason), "reason speculate only used by compiler");
   query_update_method_data(trap_mdo, trap_bci,
                            (DeoptReason)reason,
+                           NULL,
                            ignore_this_trap_count,
                            ignore_maybe_prior_trap,
                            ignore_maybe_prior_recompile);
@@ -1814,7 +1837,9 @@
   "div0_check",
   "age",
   "predicate",
-  "loop_limit_check"
+  "loop_limit_check",
+  "speculate_class_check",
+  "rtm_state_change"
 };
 const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
   // Note:  Keep this in sync. with enum DeoptAction.
--- a/src/share/vm/runtime/deoptimization.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/deoptimization.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -59,6 +59,8 @@
     Reason_age,                   // nmethod too old; tier threshold reached
     Reason_predicate,             // compiler generated predicate failed
     Reason_loop_limit_check,      // compiler generated loop limits check failed
+    Reason_speculate_class_check, // saw unexpected object class from type speculation
+    Reason_rtm_state_change,      // rtm state change detected
     Reason_LIMIT,
     // Note:  Keep this enum in sync. with _trap_reason_name.
     Reason_RECORDED_LIMIT = Reason_bimorphic  // some are not recorded per bc
@@ -311,10 +313,23 @@
       return reason;
     else if (reason == Reason_div0_check) // null check due to divide-by-zero?
       return Reason_null_check;           // recorded per BCI as a null check
+    else if (reason == Reason_speculate_class_check)
+      return Reason_class_check;
     else
       return Reason_none;
   }

+  static bool reason_is_speculate(int reason) {
+    if (reason == Reason_speculate_class_check) {
+      return true;
+    }
+    return false;
+  }
+
+  static uint per_method_trap_limit(int reason) {
+    return reason_is_speculate(reason) ? (uint)PerMethodSpecTrapLimit : (uint)PerMethodTrapLimit;
+  }
+
   static const char* trap_reason_name(int reason);
   static const char* trap_action_name(int action);
   // Format like reason='foo' action='bar' index='123'.
@@ -337,6 +352,7 @@
   static ProfileData* query_update_method_data(MethodData* trap_mdo,
                                                int trap_bci,
                                                DeoptReason reason,
+                                               Method* compiled_method,
                                                //outputs:
                                                uint& ret_this_trap_count,
                                                bool& ret_maybe_prior_trap,
--- a/src/share/vm/runtime/globals.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/globals.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -3080,9 +3080,15 @@
   product(intx, PerMethodTrapLimit,  100,                                   \
           "Limit on traps (of one kind) in a method (includes inlines)")    \
                                                                             \
+  experimental(intx, PerMethodSpecTrapLimit,  5000,                         \
+          "Limit on speculative traps (of one kind) in a method (includes inlines)") \
+                                                                            \
   product(intx, PerBytecodeTrapLimit,  4,                                   \
           "Limit on traps (of one kind) at a particular BCI")               \
                                                                             \
+  experimental(intx, SpecTrapLimitExtraEntries,  3,                         \
+          "Extra method data trap entries for speculation")                 \
+                                                                            \
   develop(intx, InlineFrequencyRatio,    20,                                \
           "Ratio of call site execution to caller method invocation")       \
                                                                             \
@@ -3832,6 +3838,22 @@
   experimental(uintx, SymbolTableSize, defaultSymbolTableSize,              \
           "Number of buckets in the JVM internal Symbol table")             \
                                                                             \
+  product(bool, UseStringDeduplication, false,                              \
+          "Use string deduplication")                                       \
+                                                                            \
+  product(bool, PrintStringDeduplicationStatistics, false,                  \
+          "Print string deduplication statistics")                          \
+                                                                            \
+  product(uintx, StringDeduplicationAgeThreshold, 3,                        \
+          "A string must reach this age (or be promoted to an old region) " \
+          "to be considered for deduplication")                             \
+                                                                            \
+  diagnostic(bool, StringDeduplicationResizeALot, false,                    \
+          "Force table resize every time the table is scanned")             \
+                                                                            \
+  diagnostic(bool, StringDeduplicationRehashALot, false,                    \
+          "Force table rehash every time the table is scanned")             \
+                                                                            \
   develop(bool, TraceDefaultMethods, false,                                 \
           "Trace the default method processing steps")                      \
                                                                             \
--- a/src/share/vm/runtime/java.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/java.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -268,7 +268,7 @@
     os::print_statistics();
   }

-  if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics) {
+  if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) {
     OptoRuntime::print_named_counters();
   }

@@ -390,7 +390,7 @@
   }

 #ifdef COMPILER2
-  if (PrintPreciseBiasedLockingStatistics) {
+  if (PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) {
     OptoRuntime::print_named_counters();
   }
 #endif
--- a/src/share/vm/runtime/mutexLocker.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/mutexLocker.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,6 +58,8 @@
 Mutex*   VtableStubs_lock             = NULL;
 Mutex*   SymbolTable_lock             = NULL;
 Mutex*   StringTable_lock             = NULL;
+Monitor* StringDedupQueue_lock        = NULL;
+Mutex*   StringDedupTable_lock        = NULL;
 Mutex*   CodeCache_lock               = NULL;
 Mutex*   MethodData_lock              = NULL;
 Mutex*   RetData_lock                 = NULL;
@@ -196,6 +198,9 @@
     def(MMUTracker_lock            , Mutex  , leaf     ,   true );
     def(HotCardCache_lock          , Mutex  , special  ,   true );
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
+
+    def(StringDedupQueue_lock      , Monitor, leaf,        true );
+    def(StringDedupTable_lock      , Mutex  , leaf,        true );
   }
   def(ParGCRareEvent_lock          , Mutex  , leaf     ,   true );
   def(DerivedPointerTableGC_lock   , Mutex,   leaf,        true );
--- a/src/share/vm/runtime/mutexLocker.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/mutexLocker.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,6 +66,8 @@
 extern Mutex*   VtableStubs_lock;                // a lock on the VtableStubs
 extern Mutex*   SymbolTable_lock;                // a lock on the symbol table
 extern Mutex*   StringTable_lock;                // a lock on the interned string table
+extern Monitor* StringDedupQueue_lock;           // a lock on the string deduplication queue
+extern Mutex*   StringDedupTable_lock;           // a lock on the string deduplication table
 extern Mutex*   CodeCache_lock;                  // a lock on the CodeCache, rank is special, use MutexLockerEx
 extern Mutex*   MethodData_lock;                 // a lock on installation of method data
 extern Mutex*   RetData_lock;                    // a lock on installation of RetData inside method data
--- a/src/share/vm/runtime/os.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/os.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -434,7 +434,10 @@
   static intx current_thread_id();
   static int current_process_id();
   static int sleep(Thread* thread, jlong ms, bool interruptable);
-  static int naked_sleep();
+  // Short standalone OS sleep suitable for slow path spin loop.
+  // Ignores Thread.interrupt() (so keep it short).
+  // ms = 0, will sleep for the least amount of time allowed by the OS.
+  static void naked_short_sleep(jlong ms);
   static void infinite_sleep(); // never returns, use with CAUTION
   static void yield();        // Yields to all threads with same priority
   enum YieldResult {
--- a/src/share/vm/runtime/park.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/park.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -59,58 +59,22 @@

   // Start by trying to recycle an existing but unassociated
   // ParkEvent from the global free list.
-  for (;;) {
-    ev = FreeList ;
-    if (ev == NULL) break ;
-    // 1: Detach - sequester or privatize the list
-    // Tantamount to ev = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
-       continue ;
+  // Using a spin lock since we are part of the mutex impl.
+  // 8028280: using concurrent free list without memory management can leak
+  // pretty badly it turns out.
+  Thread::SpinAcquire(&ListLock, "ParkEventFreeListAllocate");
+  {
+    ev = FreeList;
+    if (ev != NULL) {
+      FreeList = ev->FreeNext;
     }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    ParkEvent * List = ev->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        ParkEvent * Arv =  (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
-        }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        ParkEvent * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
   }
+  Thread::SpinRelease(&ListLock);

   if (ev != NULL) {
     guarantee (ev->AssociatedWith == NULL, "invariant") ;
   } else {
     // Do this the hard way -- materialize a new ParkEvent.
-    // In rare cases an allocating thread might detach a long list --
-    // installing null into FreeList -- and then stall or be obstructed.
-    // A 2nd thread calling Allocate() would see FreeList == null.
-    // The list held privately by the 1st thread is unavailable to the 2nd thread.
-    // In that case the 2nd thread would have to materialize a new ParkEvent,
-    // even though free ParkEvents existed in the system.  In this case we end up
-    // with more ParkEvents in circulation than we need, but the race is
-    // rare and the outcome is benign.  Ideally, the # of extant ParkEvents
-    // is equal to the maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the freelist
-    // can be transiently inaccessible.  At worst we may end up with the
-    // # of ParkEvents in circulation slightly above the ideal.
-    // Note that if we didn't have the TSM/immortal constraint, then
-    // when reattaching, above, we could trim the list.
     ev = new ParkEvent () ;
     guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
   }
@@ -124,13 +88,14 @@
   if (ev == NULL) return ;
   guarantee (ev->FreeNext == NULL      , "invariant") ;
   ev->AssociatedWith = NULL ;
-  for (;;) {
-    // Push ev onto FreeList
-    // The mechanism is "half" lock-free.
-    ParkEvent * List = FreeList ;
-    ev->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
+  // Note that if we didn't have the TSM/immortal constraint, then
+  // when reattaching we could trim the list.
+  Thread::SpinAcquire(&ListLock, "ParkEventFreeListRelease");
+  {
+    ev->FreeNext = FreeList;
+    FreeList = ev;
   }
+  Thread::SpinRelease(&ListLock);
 }

 // Override operator new and delete so we can ensure that the
@@ -164,56 +129,21 @@

   // Start by trying to recycle an existing but unassociated
   // Parker from the global free list.
-  for (;;) {
-    p = FreeList ;
-    if (p  == NULL) break ;
-    // 1: Detach
-    // Tantamount to p = Swap (&FreeList, NULL)
-    if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
-       continue ;
+  // 8028280: using concurrent free list without memory management can leak
+  // pretty badly it turns out.
+  Thread::SpinAcquire(&ListLock, "ParkerFreeListAllocate");
+  {
+    p = FreeList;
+    if (p != NULL) {
+      FreeList = p->FreeNext;
     }
-
-    // We've detached the list.  The list in-hand is now
-    // local to this thread.   This thread can operate on the
-    // list without risk of interference from other threads.
-    // 2: Extract -- pop the 1st element from the list.
-    Parker * List = p->FreeNext ;
-    if (List == NULL) break ;
-    for (;;) {
-        // 3: Try to reattach the residual list
-        guarantee (List != NULL, "invariant") ;
-        Parker * Arv =  (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
-        if (Arv == NULL) break ;
-
-        // New nodes arrived.  Try to detach the recent arrivals.
-        if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
-            continue ;
-        }
-        guarantee (Arv != NULL, "invariant") ;
-        // 4: Merge Arv into List
-        Parker * Tail = List ;
-        while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
-        Tail->FreeNext = Arv ;
-    }
-    break ;
   }
+  Thread::SpinRelease(&ListLock);

   if (p != NULL) {
     guarantee (p->AssociatedWith == NULL, "invariant") ;
   } else {
     // Do this the hard way -- materialize a new Parker..
-    // In rare cases an allocating thread might detach
-    // a long list -- installing null into FreeList --and
-    // then stall.  Another thread calling Allocate() would see
-    // FreeList == null and then invoke the ctor.  In this case we
-    // end up with more Parkers in circulation than we need, but
-    // the race is rare and the outcome is benign.
-    // Ideally, the # of extant Parkers is equal to the
-    // maximum # of threads that existed at any one time.
-    // Because of the race mentioned above, segments of the
-    // freelist can be transiently inaccessible.  At worst
-    // we may end up with the # of Parkers in circulation
-    // slightly above the ideal.
     p = new Parker() ;
   }
   p->AssociatedWith = t ;          // Associate p with t
@@ -227,11 +157,12 @@
   guarantee (p->AssociatedWith != NULL, "invariant") ;
   guarantee (p->FreeNext == NULL      , "invariant") ;
   p->AssociatedWith = NULL ;
-  for (;;) {
-    // Push p onto FreeList
-    Parker * List = FreeList ;
-    p->FreeNext = List ;
-    if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
+
+  Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease");
+  {
+    p->FreeNext = FreeList;
+    FreeList = p;
   }
+  Thread::SpinRelease(&ListLock);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/rtmLocking.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_RTMLOCKING_HPP
+#define SHARE_VM_RUNTIME_RTMLOCKING_HPP
+
+// Generate RTM (Restricted Transactional Memory) locking code for all inflated
+// locks when "UseRTMLocking" option is on with normal locking mechanism as fall back
+// handler.
+//
+// On abort/lock busy the lock will be retried a fixed number of times under RTM
+// as specified by "RTMRetryCount" option. The locks which abort too often
+// can be auto tuned or manually tuned.
+//
+// Auto-tuning can be done on an option like UseRTMDeopt and it will need abort
+// ratio calculation for each lock. The abort ratio will be calculated after
+// "RTMAbortThreshold" number of aborts is reached. The formulas are:
+//
+//     Aborted transactions = abort_count * 100
+//     All transactions = total_count *  RTMTotalCountIncrRate
+//
+//     Aborted transactions >= All transactions * RTMAbortRatio
+//
+// If "UseRTMDeopt" is on and the aborts ratio reaches "RTMAbortRatio"
+// the method containing the lock will be deoptimized and recompiled with
+// all locks as normal locks. If the abort ratio continues to remain low after
+// "RTMLockingThreshold" locks are attempted, then the method will be deoptimized
+// and recompiled with all locks as RTM locks without abort ratio calculation code.
+// The abort ratio calculation can be delayed by specifying flag
+// -XX:RTMLockingCalculationDelay in millisecond.
+//
+// For manual tuning the abort statistics for each lock needs to be provided
+// to the user on some JVM option like "PrintPreciseRTMLockingStatistics".
+// Based on the abort statistics users can create a .hotspot_compiler file
+// or use -XX:CompileCommand=option,class::method,NoRTMLockEliding
+// to specify for which methods to disable RTM locking.
+//
+// When UseRTMForStackLocks option is enabled along with UseRTMLocking option,
+// the RTM locking code is generated for stack locks too.
+// The retries, auto-tuning support and rtm locking statistics are all
+// supported for stack locks just like inflated locks.
+
+// RTM locking counters
+class RTMLockingCounters VALUE_OBJ_CLASS_SPEC {
+ private:
+  uintx _total_count; // Total RTM locks count
+  uintx _abort_count; // Total aborts count
+
+ public:
+  enum { ABORT_STATUS_LIMIT = 6 };
+  // Counters per RTM Abort Status. Incremented with +PrintPreciseRTMLockingStatistics
+  // RTM uses the EAX register to communicate abort status to software.
+  // Following an RTM abort the EAX register has the following definition.
+  //
+  //   EAX register bit position   Meaning
+  //     0     Set if abort caused by XABORT instruction.
+  //     1     If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
+  //     2     Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
+  //     3     Set if an internal buffer overflowed.
+  //     4     Set if a debug breakpoint was hit.
+  //     5     Set if an abort occurred during execution of a nested transaction.
+ private:
+  uintx _abortX_count[ABORT_STATUS_LIMIT];
+
+ public:
+  static uintx _calculation_flag;
+  static uintx* rtm_calculation_flag_addr() { return &_calculation_flag; }
+
+  static void init();
+
+  RTMLockingCounters() : _total_count(0), _abort_count(0) {
+    for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
+      _abortX_count[i] = 0;
+    }
+  }
+
+  uintx* total_count_addr()               { return &_total_count; }
+  uintx* abort_count_addr()               { return &_abort_count; }
+  uintx* abortX_count_addr()              { return &_abortX_count[0]; }
+
+  static int total_count_offset()         { return (int)offset_of(RTMLockingCounters, _total_count); }
+  static int abort_count_offset()         { return (int)offset_of(RTMLockingCounters, _abort_count); }
+  static int abortX_count_offset()        { return (int)offset_of(RTMLockingCounters, _abortX_count[0]); }
+
+
+  bool nonzero() {  return (_abort_count + _total_count) > 0; }
+
+  void print_on(outputStream* st);
+  void print() { print_on(tty); }
+};
+
+#endif // SHARE_VM_RUNTIME_RTMLOCKING_HPP
--- a/src/share/vm/runtime/task.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/task.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -105,7 +105,6 @@
   _counter(0), _interval((int) interval_time) {
   // Sanity check the interval time
   assert(_interval >= PeriodicTask::min_interval &&
-         _interval <= PeriodicTask::max_interval &&
          _interval %  PeriodicTask::interval_gran == 0,
               "improper PeriodicTask interval time");
 }
--- a/src/share/vm/runtime/thread.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/thread.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -107,6 +107,9 @@
 #include "opto/c2compiler.hpp"
 #include "opto/idealGraphPrinter.hpp"
 #endif
+#if INCLUDE_RTM_OPT
+#include "runtime/rtmLocking.hpp"
+#endif

 #ifdef DTRACE_ENABLED

@@ -3673,6 +3676,10 @@

   BiasedLocking::init();

+#if INCLUDE_RTM_OPT
+  RTMLockingCounters::init();
+#endif
+
   if (JDK_Version::current().post_vm_init_hook_enabled()) {
     call_postVMInitHook(THREAD);
     // The Java side of PostVMInitHook.run must deal with all
@@ -4449,9 +4456,7 @@
         ++ctr ;
         if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
            if (Yields > 5) {
-             // Consider using a simple NakedSleep() instead.
-             // Then SpinAcquire could be called by non-JVM threads
-             Thread::current()->_ParkEvent->park(1) ;
+             os::naked_short_sleep(1);
            } else {
              os::NakedYield() ;
              ++Yields ;
--- a/src/share/vm/runtime/vmStructs.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1946,15 +1946,6 @@
   declare_c2_type(CmpF3Node, CmpFNode)                                    \
   declare_c2_type(CmpDNode, CmpNode)                                      \
   declare_c2_type(CmpD3Node, CmpDNode)                                    \
-  declare_c2_type(MathExactNode, MultiNode)                               \
-  declare_c2_type(MathExactINode, MathExactNode)                          \
-  declare_c2_type(AddExactINode, MathExactINode)                          \
-  declare_c2_type(AddExactLNode, MathExactLNode)                          \
-  declare_c2_type(SubExactINode, MathExactINode)                          \
-  declare_c2_type(SubExactLNode, MathExactLNode)                          \
-  declare_c2_type(NegExactINode, MathExactINode)                          \
-  declare_c2_type(MulExactINode, MathExactINode)                          \
-  declare_c2_type(FlagsProjNode, ProjNode)                                \
   declare_c2_type(BoolNode, Node)                                         \
   declare_c2_type(AbsNode, Node)                                          \
   declare_c2_type(AbsINode, AbsNode)                                      \
@@ -2035,6 +2026,15 @@
   declare_c2_type(ExtractLNode, ExtractNode)                              \
   declare_c2_type(ExtractFNode, ExtractNode)                              \
   declare_c2_type(ExtractDNode, ExtractNode)                              \
+  declare_c2_type(OverflowNode, CmpNode)                                  \
+  declare_c2_type(OverflowINode, OverflowNode)                            \
+  declare_c2_type(OverflowAddINode, OverflowINode)                        \
+  declare_c2_type(OverflowSubINode, OverflowINode)                        \
+  declare_c2_type(OverflowMulINode, OverflowINode)                        \
+  declare_c2_type(OverflowLNode, OverflowNode)                            \
+  declare_c2_type(OverflowAddLNode, OverflowLNode)                        \
+  declare_c2_type(OverflowSubLNode, OverflowLNode)                        \
+  declare_c2_type(OverflowMulLNode, OverflowLNode)                        \
                                                                           \
   /*********************/                                                 \
   /* Adapter Blob Entries */                                              \
--- a/src/share/vm/trace/trace.xml	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/trace/trace.xml	Tue Mar 25 17:07:36 2014 -0700
@@ -193,11 +193,48 @@
     <event id="MetaspaceSummary" path="vm/gc/heap/metaspace_summary" label="Metaspace Summary" is_instant="true">
       <value type="UINT" field="gcId" label="GC ID" relation="GC_ID"/>
       <value type="GCWHEN" field="when" label="When" />
+      <value type="BYTES64" field="gcThreshold" label="GC Threshold" />
       <structvalue type="MetaspaceSizes" field="metaspace" label="Total"/>
       <structvalue type="MetaspaceSizes" field="dataSpace" label="Data"/>
       <structvalue type="MetaspaceSizes" field="classSpace" label="Class"/>
     </event>

+    <event id="MetaspaceGCThreshold" path="vm/gc/metaspace/gc_threshold" label="Metaspace GC Threshold" is_instant="true">
+      <value type="BYTES64" field="oldValue" label="Old Value" />
+      <value type="BYTES64" field="newValue" label="New Value" />
+      <value type="GCTHRESHOLDUPDATER" field="updater" label="Updater" />
+    </event>
+
+    <event id="MetaspaceAllocationFailure" path="vm/gc/metaspace/allocation_failure" label="Metaspace Allocation Failure" is_instant="true" has_stacktrace="true">
+      <value type="CLASS" field="classLoader" label="Class Loader" />
+      <value type="BOOLEAN" field="anonymousClassLoader" label="Anonymous Class Loader" />
+      <value type="BYTES64" field="size" label="Size" />
+      <value type="METADATATYPE" field="metadataType" label="Metadata Type" />
+      <value type="METASPACEOBJTYPE" field="metaspaceObjectType" label="Metaspace Object Type" />
+    </event>
+
+    <event id="MetaspaceOOM" path="vm/gc/metaspace/out_of_memory" label="Metaspace Out of Memory" is_instant="true" has_stacktrace="true">
+      <value type="CLASS" field="classLoader" label="Class Loader" />
+      <value type="BOOLEAN" field="anonymousClassLoader" label="Anonymous Class Loader" />
+      <value type="BYTES64" field="size" label="Size" />
+      <value type="METADATATYPE" field="metadataType" label="Metadata Type" />
+      <value type="METASPACEOBJTYPE" field="metaspaceObjectType" label="Metaspace Object Type" />
+    </event>
+
+    <event id="MetaspaceChunkFreeListSummary" path="vm/gc/metaspace/chunk_free_list_summary" label="Metaspace Chunk Free List Summary" is_instant="true">
+      <value type="UINT" field="gcId" label="GC ID" relation="GC_ID"/>
+      <value type="GCWHEN" field="when" label="When" />
+      <value type="METADATATYPE" field="metadataType" label="Metadata Type" />
+      <value type="ULONG" field="specializedChunks" label="Specialized Chunks" />
+      <value type="BYTES64" field="specializedChunksTotalSize" label="Specialized Chunks Total Size" />
+      <value type="ULONG" field="smallChunks" label="Small Chunks" />
+      <value type="BYTES64" field="smallChunksTotalSize" label="Small Chunks Total Size" />
+      <value type="ULONG" field="mediumChunks" label="Medium Chunks" />
+      <value type="BYTES64" field="mediumChunksTotalSize" label="Medium Chunks Total Size" />
+      <value type="ULONG" field="humongousChunks" label="Humongous Chunks" />
+      <value type="BYTES64" field="humongousChunksTotalSize" label="Humongous Chunks Total Size" />
+    </event>
+
     <event id="PSHeapSummary" path="vm/gc/heap/ps_summary" label="Parallel Scavenge Heap Summary" is_instant="true">
       <value type="UINT" field="gcId" label="GC ID" relation="GC_ID"/>
       <value type="GCWHEN" field="when" label="When" />
--- a/src/share/vm/trace/tracetypes.xml	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/trace/tracetypes.xml	Tue Mar 25 17:07:36 2014 -0700
@@ -130,11 +130,26 @@
       <value type="UTF8" field="type" label="type" />
     </content_type>

+    <content_type id="GCThresholdUpdater" hr_name="GC Treshold Updater"
+                  type="U1" jvm_type="GCTHRESHOLDUPDATER">
+      <value type="UTF8" field="updater" label="updater" />
+    </content_type>
+
     <content_type id="ReferenceType" hr_name="Reference Type"
                   type="U1" jvm_type="REFERENCETYPE">
       <value type="UTF8" field="type" label="type" />
     </content_type>

+    <content_type id="MetadataType" hr_name="Metadata Type"
+                  type="U1" jvm_type="METADATATYPE">
+      <value type="UTF8" field="type" label="type" />
+    </content_type>
+
+    <content_type id="MetaspaceObjectType" hr_name="Metaspace Object Type"
+                  type="U1" jvm_type="METASPACEOBJTYPE">
+      <value type="UTF8" field="type" label="type" />
+    </content_type>
+
     <content_type id="NARROW_OOP_MODE" hr_name="Narrow Oop Mode"
                   type="U1" jvm_type="NARROWOOPMODE">
       <value type="UTF8" field="mode" label="mode" />
@@ -324,10 +339,22 @@
     <primary_type symbol="G1YCTYPE" datatype="U1" contenttype="G1YCTYPE"
                   type="u1" sizeop="sizeof(u1)" />

+    <!-- GCTHRESHOLDUPDATER -->
+    <primary_type symbol="GCTHRESHOLDUPDATER" datatype="U1" contenttype="GCTHRESHOLDUPDATER"
+                  type="u1" sizeop="sizeof(u1)" />
+
     <!-- REFERENCETYPE -->
     <primary_type symbol="REFERENCETYPE" datatype="U1"
                   contenttype="REFERENCETYPE" type="u1" sizeop="sizeof(u1)" />

+    <!-- METADATATYPE -->
+    <primary_type symbol="METADATATYPE" datatype="U1"
+                  contenttype="METADATATYPE" type="u1" sizeop="sizeof(u1)" />
+
+    <!-- METADATAOBJTYPE -->
+    <primary_type symbol="METASPACEOBJTYPE" datatype="U1"
+                  contenttype="METASPACEOBJTYPE" type="u1" sizeop="sizeof(u1)" />
+
     <!-- NARROWOOPMODE -->
     <primary_type symbol="NARROWOOPMODE" datatype="U1"
                   contenttype="NARROWOOPMODE" type="u1" sizeop="sizeof(u1)" />
--- a/src/share/vm/utilities/globalDefinitions.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -373,6 +373,21 @@

 // Machine dependent stuff

+#if defined(X86) && defined(COMPILER2) && !defined(JAVASE_EMBEDDED)
+// Include Restricted Transactional Memory lock eliding optimization
+#define INCLUDE_RTM_OPT 1
+#define RTM_OPT_ONLY(code) code
+#else
+#define INCLUDE_RTM_OPT 0
+#define RTM_OPT_ONLY(code)
+#endif
+// States of Restricted Transactional Memory usage.
+enum RTMState {
+  NoRTM      = 0x2, // Don't use RTM
+  UseRTM     = 0x1, // Use RTM
+  ProfileRTM = 0x0  // Use RTM with abort ratio calculation
+};
+
 #ifdef TARGET_ARCH_x86
 # include "globalDefinitions_x86.hpp"
 #endif
--- a/src/share/vm/utilities/hashtable.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/utilities/hashtable.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -93,7 +93,7 @@
   return false;
 }

-template <class T, MEMFLAGS F> jint Hashtable<T, F>::_seed = 0;
+template <class T, MEMFLAGS F> juint Hashtable<T, F>::_seed = 0;

 // Create a new table and using alternate hash code, populate the new table
 // with the existing elements.   This can be used to change the hash code
--- a/src/share/vm/utilities/hashtable.hpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/utilities/hashtable.hpp	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -280,7 +280,7 @@
   // Function to move these elements into the new table.
   void move_to(Hashtable<T, F>* new_table);
   static bool use_alternate_hashcode()  { return _seed != 0; }
-  static jint seed()                    { return _seed; }
+  static juint seed()                    { return _seed; }

   static int literal_size(Symbol *symbol);
   static int literal_size(oop oop);
@@ -296,7 +296,7 @@
   void dump_table(outputStream* st, const char *table_name);

  private:
-  static jint _seed;
+  static juint _seed;
 };
--- a/src/share/vm/utilities/vmError.cpp	Tue Mar 25 12:54:21 2014 -0700
+++ b/src/share/vm/utilities/vmError.cpp	Tue Mar 25 17:07:36 2014 -0700
@@ -592,13 +592,24 @@
              st->cr();
              // Compiled code may use EBP register on x86 so it looks like
              // non-walkable C frame. Use frame.sender() for java frames.
-             if (_thread && _thread->is_Java_thread() && fr.is_java_frame()) {
-               RegisterMap map((JavaThread*)_thread, false); // No update
-               fr = fr.sender(&map);
-               continue;
+             if (_thread && _thread->is_Java_thread()) {
+               // Catch very first native frame by using stack address.
+               // For JavaThread stack_base and stack_size should be set.
+               if (!_thread->on_local_stack((address)(fr.sender_sp() + 1))) {
+                 break;
+               }
+               if (fr.is_java_frame()) {
+                 RegisterMap map((JavaThread*)_thread, false); // No update
+                 fr = fr.sender(&map);
+               } else {
+                 fr = os::get_sender_for_C_frame(&fr);
+               }
+             } else {
+               // is_first_C_frame() does only simple checks for frame pointer,
+               // it will pass if java compiled code has a pointer in EBP.
+               if (os::is_first_C_frame(&fr)) break;
+               fr = os::get_sender_for_C_frame(&fr);
              }
-             if (os::is_first_C_frame(&fr)) break;
-             fr = os::get_sender_for_C_frame(&fr);
           }

           if (count > StackPrintLimit) {
--- a/test/TEST.groups	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/TEST.groups	Tue Mar 25 17:07:36 2014 -0700
@@ -130,8 +130,12 @@
   gc/g1/TestHumongousAllocInitialMark.java \
   gc/arguments/TestG1HeapRegionSize.java \
   gc/metaspace/TestMetaspaceMemoryPool.java \
+  gc/arguments/TestDynMinHeapFreeRatio.java \
+  gc/arguments/TestDynMaxHeapFreeRatio.java \
   runtime/InternalApi/ThreadCpuTimesDeadlock.java \
-  serviceability/threads/TestFalseDeadLock.java
+  serviceability/threads/TestFalseDeadLock.java \
+  compiler/tiered/NonTieredLevelsTest.java \
+  compiler/tiered/TieredLevelsTest.java

 # Compact 2 adds full VM tests
 compact2 = \
--- a/test/compiler/ciReplay/TestVM.sh	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/ciReplay/TestVM.sh	Tue Mar 25 17:07:36 2014 -0700
@@ -78,8 +78,8 @@
         positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \
                 "-XX:TieredStopAtLevel=$stop_level"
         stop_level=`expr $stop_level + 1`
+        cleanup
     done
-    cleanup
 fi

 echo TEST PASSED
--- a/test/compiler/ciReplay/common.sh	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/ciReplay/common.sh	Tue Mar 25 17:07:36 2014 -0700
@@ -99,14 +99,13 @@
 # $2 - non-tiered comp_level
 nontiered_tests() {
     level=`grep "^compile " $replay_data | awk '{print $6}'`
-    # is level available in non-tiere
+    # is level available in non-tiered
     if [ "$level" -eq $2 ]
     then
         positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \
                 -XX:-TieredCompilation
     else
         negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
-        negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
                 -XX:-TieredCompilation
     fi
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/codegen/BMI1.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8031321
+ * @summary Support BMI1 instructions on x86/x64
+ * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=compileonly,BMITests.* BMI1
+ *
+ */
+
+class MemI {
+  public int x;
+  public MemI(int x) { this.x = x; }
+}
+
+class MemL {
+  public long x;
+  public MemL(long x) { this.x = x; }
+}
+
+class BMITests {
+  static int andnl(int src1, int src2) {
+    return ~src1 & src2;
+  }
+  static long andnq(long src1, long src2) {
+    return ~src1 & src2;
+  }
+  static int andnl(int src1, MemI src2) {
+    return ~src1 & src2.x;
+  }
+  static long andnq(long src1, MemL src2) {
+    return ~src1 & src2.x;
+  }
+  static int blsil(int src1) {
+    return src1 & -src1;
+  }
+  static long blsiq(long src1) {
+    return src1 & -src1;
+  }
+  static int blsil(MemI src1) {
+    return src1.x & -src1.x;
+  }
+  static long blsiq(MemL src1) {
+    return src1.x & -src1.x;
+  }
+  static int blsmskl(int src1) {
+    return (src1 - 1) ^ src1;
+  }
+  static long blsmskq(long src1) {
+    return (src1 - 1) ^ src1;
+  }
+  static int blsmskl(MemI src1) {
+    return (src1.x - 1) ^ src1.x;
+  }
+  static long blsmskq(MemL src1) {
+    return (src1.x - 1) ^ src1.x;
+  }
+  static int blsrl(int src1) {
+    return (src1 - 1) & src1;
+  }
+  static long blsrq(long src1) {
+    return (src1 - 1) & src1;
+  }
+  static int blsrl(MemI src1) {
+    return (src1.x - 1) & src1.x;
+  }
+  static long blsrq(MemL src1) {
+    return (src1.x - 1) & src1.x;
+  }
+  static int lzcntl(int src1) {
+    return Integer.numberOfLeadingZeros(src1);
+  }
+  static int lzcntq(long src1) {
+    return Long.numberOfLeadingZeros(src1);
+  }
+  static int tzcntl(int src1) {
+    return Integer.numberOfTrailingZeros(src1);
+  }
+  static int tzcntq(long src1) {
+    return Long.numberOfTrailingZeros(src1);
+  }
+}
+
+public class BMI1 {
+  private final static int ITERATIONS = 1000000;
+
+  public static void main(String[] args) {
+    int ix = 0x01234567;
+    int iy = 0x89abcdef;
+    MemI imy = new MemI(iy);
+    long lx = 0x0123456701234567L;
+    long ly = 0x89abcdef89abcdefL;
+    MemL lmy = new MemL(ly);
+
+    { // match(Set dst (AndI (XorI src1 minus_1) src2))
+      int z = BMITests.andnl(ix, iy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.andnl(ix, iy);
+        if (ii != z) {
+          throw new Error("andnl with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (XorL src1 minus_1) src2))
+      long z = BMITests.andnq(lx, ly);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.andnq(lx, ly);
+        if (ll != z) {
+          throw new Error("andnq with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)))
+      int z = BMITests.andnl(ix, imy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.andnl(ix, imy);
+        if (ii != z) {
+          throw new Error("andnl with memory failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)))
+      long z = BMITests.andnq(lx, lmy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.andnq(lx, lmy);
+        if (ll != z) {
+          throw new Error("andnq with memory failed");
+        }
+      }
+    }
+    { // match(Set dst (AndI (SubI imm_zero src) src))
+      int z = BMITests.blsil(ix);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsil(ix);
+        if (ii != z) {
+          throw new Error("blsil with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (SubL imm_zero src) src))
+      long z = BMITests.blsiq(lx);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsiq(lx);
+        if (ll != z) {
+          throw new Error("blsiq with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ))
+      int z = BMITests.blsil(imy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsil(imy);
+        if (ii != z) {
+          throw new Error("blsil with memory failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ))
+      long z = BMITests.blsiq(lmy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsiq(lmy);
+        if (ll != z) {
+          throw new Error("blsiq with memory failed");
+        }
+      }
+    }
+
+    { // match(Set dst (XorI (AddI src minus_1) src))
+      int z = BMITests.blsmskl(ix);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsmskl(ix);
+        if (ii != z) {
+          throw new Error("blsmskl with register failed");
+        }
+      }
+    }
+    { // match(Set dst (XorL (AddL src minus_1) src))
+      long z = BMITests.blsmskq(lx);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsmskq(lx);
+        if (ll != z) {
+          throw new Error("blsmskq with register failed");
+        }
+      }
+    }
+    { // match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) )
+      int z = BMITests.blsmskl(imy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsmskl(imy);
+        if (ii != z) {
+          throw new Error("blsmskl with memory failed");
+        }
+      }
+    }
+    {  // match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) )
+      long z = BMITests.blsmskq(lmy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsmskq(lmy);
+        if (ll != z) {
+          throw new Error("blsmskq with memory failed");
+        }
+      }
+    }
+
+    { //  match(Set dst (AndI (AddI src minus_1) src) )
+      int z = BMITests.blsrl(ix);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsrl(ix);
+        if (ii != z) {
+          throw new Error("blsrl with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (AddL src minus_1) src) )
+      long z = BMITests.blsrq(lx);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsrq(lx);
+        if (ll != z) {
+          throw new Error("blsrq with register failed");
+        }
+      }
+    }
+    { // match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) )
+      int z = BMITests.blsrl(imy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.blsrl(imy);
+        if (ii != z) {
+          throw new Error("blsrl with memory failed");
+        }
+      }
+    }
+    { // match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) )
+      long z = BMITests.blsrq(lmy);
+      for (int i = 0; i < ITERATIONS; i++) {
+        long ll = BMITests.blsrq(lmy);
+        if (ll != z) {
+          throw new Error("blsrq with memory failed");
+        }
+      }
+    }
+
+    {
+      int z = BMITests.lzcntl(ix);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.lzcntl(ix);
+        if (ii != z) {
+          throw new Error("lzcntl failed");
+        }
+      }
+    }
+    {
+      int z = BMITests.lzcntq(lx);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.lzcntq(lx);
+        if (ii != z) {
+          throw new Error("lzcntq failed");
+        }
+      }
+    }
+
+    {
+      int z = BMITests.tzcntl(ix);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.tzcntl(ix);
+        if (ii != z) {
+          throw new Error("tzcntl failed");
+        }
+      }
+    }
+    {
+      int z = BMITests.tzcntq(lx);
+      for (int i = 0; i < ITERATIONS; i++) {
+        int ii = BMITests.tzcntq(lx);
+        if (ii != z) {
+          throw new Error("tzcntq failed");
+        }
+      }
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/BMITestRunner.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+import java.util.*;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.charset.StandardCharsets;
+
+import com.oracle.java.testlibrary.*;
+
+/**
+ * Test runner that invokes all methods implemented by particular Expr
+ * with random arguments in two different JVM processes and compares output.
+ * JVMs being started in different modes - one in int and other in comp
+ * with C2 and disabled tiered compilation.
+ */
+public class BMITestRunner {
+
+    enum VMMode {
+        COMP, INT;
+    };
+
+    public static int DEFAULT_ITERATIONS_COUNT = 4000;
+
+    /**
+     * Execute all methods implemented by <b>expr</b> in int and comp modes
+     * and compare output.
+     * Test pass only of output obtained with different VM modes is equal.
+     * To control behaviour of test following options could be passed:
+     * <ul>
+     *   <li>-iterations=&lt;N&gt; each operation implemented by
+     *       <b>expr</b> will be executed <i>N</i> times. Default value
+     *       is 4000.</li>
+     *   <li>-seed=&lt;SEED&gt; arguments for <b>expr</b>'s methods
+     *       obtained via RNG initiated with seed <i>SEED</i>. By default
+     *       some random seed will be used.</li>
+     * </ul>
+     *
+     * @param expr operation that should be tested
+     * @param testOpts options to control test behaviour
+     * @param additionalVMOpts additional options for VM
+     *
+     * @throws Throwable if test failed.
+     */
+    public static void runTests(Class<? extends Expr> expr,
+                                String testOpts[],
+                                String... additionalVMOpts)
+                         throws Throwable {
+
+        int seed = new Random().nextInt();
+        int iterations = DEFAULT_ITERATIONS_COUNT;
+
+        for (String testOption : testOpts) {
+            if (testOption.startsWith("-iterations=")) {
+                iterations = Integer.valueOf(testOption.
+                                             replace("-iterations=", ""));
+            } else if (testOption.startsWith("-seed=")) {
+                seed = Integer.valueOf(testOption.replace("-seed=", ""));
+            }
+        }
+
+        System.out.println("Running test with seed: " + seed);
+
+        OutputAnalyzer intOutput = runTest(expr, VMMode.INT,
+                                           additionalVMOpts,
+                                           seed, iterations);
+        OutputAnalyzer compOutput = runTest(expr, VMMode.COMP,
+                                            additionalVMOpts,
+                                            seed, iterations);
+
+        dumpOutput(intOutput, "int");
+        dumpOutput(compOutput, "comp");
+
+        Asserts.assertStringsEqual(intOutput.getStdout(),
+                                   compOutput.getStdout(),
+                                   "Results obtained in -Xint and " +
+                                   "-Xcomp should be the same.");
+    }
+
+    /**
+     * Execute tests on methods implemented by <b>expr</b> in new VM
+     * started in <b>testVMMode</b> mode.
+     *
+     * @param expr operation that should be tested
+     * @param testVMMode VM mode for test
+     * @param additionalVMOpts additional options for VM
+     * @param seed for RNG used it tests
+     * @param iterations that will be used to invoke <b>expr</b>'s methods.
+     *
+     * @return OutputAnalyzer for executed test.
+     * @throws Throwable when something goes wrong.
+     */
+    public static OutputAnalyzer runTest(Class<? extends Expr> expr,
+                                         VMMode testVMMode,
+                                         String additionalVMOpts[],
+                                         int seed, int iterations)
+                                  throws Throwable {
+
+        List<String> vmOpts = new LinkedList<String>();
+
+        Collections.addAll(vmOpts, additionalVMOpts);
+
+        //setup mode-specific options
+        switch (testVMMode) {
+        case INT:
+            Collections.addAll(vmOpts, new String[] { "-Xint" });
+            break;
+        case COMP:
+            Collections.addAll(vmOpts, new String[] {
+                    "-Xcomp",
+                    "-XX:-TieredCompilation",
+                    String.format("-XX:CompileCommand=compileonly,%s::*",
+                                  expr.getName())
+                });
+            break;
+        }
+
+        Collections.addAll(vmOpts, new String[] {
+                "-XX:+DisplayVMOutputToStderr",
+                Executor.class.getName(),
+                expr.getName(),
+                new Integer(seed).toString(),
+                new Integer(iterations).toString()
+            });
+
+        OutputAnalyzer outputAnalyzer = ProcessTools.
+            executeTestJvm(vmOpts.toArray(new String[vmOpts.size()]));
+
+        outputAnalyzer.shouldHaveExitValue(0);
+
+        return outputAnalyzer;
+    }
+
+    /**
+     * Dump stdout and stderr of test process to <i>prefix</i>.test.out
+     * and <i>prefix</i>.test.err respectively.
+     *
+     * @param outputAnalyzer OutputAnalyzer whom output should be dumped
+     * @param prefix Prefix that will be used in file names.
+     * @throws IOException if unable to dump output to file.
+     */
+    protected static void dumpOutput(OutputAnalyzer outputAnalyzer,
+                                     String prefix)
+                              throws IOException {
+        Files.write(Paths.get(prefix + ".test.out"),
+                    outputAnalyzer.getStdout().getBytes());
+
+        Files.write(Paths.get(prefix + ".test.err"),
+                    outputAnalyzer.getStderr().getBytes());
+    }
+
+
+    /**
+     * Executor that invoke all methods implemented by particular
+     * Expr instance.
+     */
+    public static class Executor {
+
+        /**
+         * Usage: BMITestRunner$Executor <ExprClassName> <seed> <iterations>
+         */
+        public static void main(String args[]) throws Exception {
+            @SuppressWarnings("unchecked")
+            Class<? extends Expr> exprClass =
+                (Class<? extends Expr>)Class.forName(args[0]);
+            Expr expr = exprClass.getConstructor().newInstance();
+            Random rng = new Random(Integer.valueOf(args[1]));
+            int iterations = Integer.valueOf(args[2]);
+            runTests(expr, iterations, rng);
+        }
+
+
+        public static int[] getIntBitShifts() {
+            //SIZE+1 shift is for zero.
+            int data[] = new int[Integer.SIZE+1];
+            for (int s = 0; s < data.length; s++) {
+                data[s] = 1<<s;
+            }
+            return data;
+        }
+
+        public static long[] getLongBitShifts() {
+            //SIZE+1 shift is for zero.
+            long data[] = new long[Long.SIZE+1];
+            for (int s = 0; s < data.length; s++) {
+                data[s] = 1L<<s;
+            }
+            return data;
+        }
+
+        public static void log(String format, Object... args) {
+            System.out.println(String.format(format, args));
+        }
+
+        public static void runTests(Expr expr, int iterations, Random rng) {
+            runUnaryIntRegTest(expr, iterations, rng);
+            runUnaryIntMemTest(expr, iterations, rng);
+            runUnaryLongRegTest(expr, iterations, rng);
+            runUnaryLongMemTest(expr, iterations, rng);
+            runBinaryRegRegIntTest(expr, iterations, rng);
+            runBinaryRegMemIntTest(expr, iterations, rng);
+            runBinaryMemRegIntTest(expr, iterations, rng);
+            runBinaryMemMemIntTest(expr, iterations, rng);
+            runBinaryRegRegLongTest(expr, iterations, rng);
+            runBinaryRegMemLongTest(expr, iterations, rng);
+            runBinaryMemRegLongTest(expr, iterations, rng);
+            runBinaryMemMemLongTest(expr, iterations, rng);
+        }
+
+        public static void runUnaryIntRegTest(Expr expr, int iterations,
+                                              Random rng) {
+            if (!(expr.isUnaryArgumentSupported()
+                  && expr.isIntExprSupported())) {
+                return;
+            }
+
+            for (int value : getIntBitShifts()) {
+                log("UnaryIntReg(0X%x) -> 0X%x",
+                    value, expr.intExpr(value));
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int value = rng.nextInt();
+                log("UnaryIntReg(0X%x) -> 0X%x",
+                    value, expr.intExpr(value));
+            }
+        }
+
+        public static void runUnaryIntMemTest(Expr expr, int iterations,
+                                              Random rng) {
+            if (!(expr.isUnaryArgumentSupported()
+                  && expr.isIntExprSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int value : getIntBitShifts()) {
+                log("UnaryIntMem(0X%x) -> 0X%x",
+                    value, expr.intExpr(new Expr.MemI(value)));
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int value = rng.nextInt();
+                log("UnaryIntMem(0X%x) -> 0X%x",
+                    value, expr.intExpr(new Expr.MemI(value)));
+            }
+        }
+
+        public static void runUnaryLongRegTest(Expr expr, int iterations,
+                                               Random rng) {
+            if (!(expr.isUnaryArgumentSupported()
+                  && expr.isLongExprSupported())) {
+                return;
+            }
+
+            for (long value : getLongBitShifts()) {
+                log("UnaryLongReg(0X%x) -> 0X%x",
+                    value, expr.longExpr(value));
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long value = rng.nextLong();
+                log("UnaryLongReg(0X%x) -> 0X%x",
+                    value, expr.longExpr(value));
+            }
+        }
+
+        public static void runUnaryLongMemTest(Expr expr, int iterations,
+                                               Random rng) {
+            if (!(expr.isUnaryArgumentSupported()
+                  && expr.isLongExprSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (long value : getLongBitShifts()) {
+                log("UnaryLongMem(0X%x) -> 0X%x",
+                    value, expr.longExpr(new Expr.MemL(value)));
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long value = rng.nextLong();
+                log("UnaryLongMem(0X%x) -> 0X%x",
+                    value, expr.longExpr(new Expr.MemL(value)));
+            }
+        }
+
+        public static void runBinaryRegRegIntTest(Expr expr, int iterations,
+                                                  Random rng) {
+            if (!(expr.isIntExprSupported()
+                  && expr.isBinaryArgumentSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int aValue = rng.nextInt();
+                int bValue = rng.nextInt();
+                log("BinaryIntRegReg(0X%x, 0X%x) -> 0X%x",
+                    aValue, bValue, expr.intExpr(aValue, bValue));
+            }
+        }
+
+        public static void runBinaryRegMemIntTest(Expr expr, int iterations,
+                                                  Random rng) {
+            if (!(expr.isIntExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int aValue = rng.nextInt();
+                int bValue = rng.nextInt();
+                log("BinaryIntRegMem(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.intExpr(aValue, new Expr.MemI(bValue)));
+            }
+        }
+
+        public static void runBinaryMemRegIntTest(Expr expr, int iterations,
+                                                  Random rng) {
+            if (!(expr.isIntExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int aValue = rng.nextInt();
+                int bValue = rng.nextInt();
+                log("BinaryIntMemReg(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.intExpr(new Expr.MemI(aValue), bValue));
+            }
+        }
+
+        public static void runBinaryMemMemIntTest(Expr expr, int iterations,
+                                                  Random rng) {
+            if (!(expr.isIntExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                int aValue = rng.nextInt();
+                int bValue = rng.nextInt();
+                log("BinaryIntMemMem(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.intExpr(new Expr.MemI(aValue),
+                                 new Expr.MemI(bValue)));
+            }
+        }
+
+        public static void runBinaryRegRegLongTest(Expr expr,
+                                                   int iterations,
+                                                   Random rng) {
+            if (!(expr.isLongExprSupported()
+                  && expr.isBinaryArgumentSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long aValue = rng.nextLong();
+                long bValue = rng.nextLong();
+                log("BinaryLongRegReg(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.longExpr(aValue, bValue));
+            }
+        }
+
+        public static void runBinaryRegMemLongTest(Expr expr,
+                                                   int iterations,
+                                                   Random rng) {
+            if (!(expr.isLongExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long aValue = rng.nextLong();
+                long bValue = rng.nextLong();
+                log("BinaryLongRegMem(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.longExpr(aValue, new Expr.MemL(bValue)));
+            }
+        }
+
+        public static void runBinaryMemRegLongTest(Expr expr,
+                                                   int iterations,
+                                                   Random rng) {
+            if (!(expr.isLongExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long aValue = rng.nextLong();
+                long bValue = rng.nextLong();
+                log("BinaryLongMemReg(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.longExpr(new Expr.MemL(aValue), bValue));
+            }
+        }
+
+        public static void runBinaryMemMemLongTest(Expr expr,
+                                                   int iterations,
+                                                   Random rng) {
+            if (!(expr.isLongExprSupported()
+                  && expr.isBinaryArgumentSupported()
+                  && expr.isMemExprSupported())) {
+                return;
+            }
+
+            for (int i = 0; i < iterations; i++) {
+                long aValue = rng.nextLong();
+                long bValue = rng.nextLong();
+                log("BinaryLongMemMem(0X%x, 0X%x) -> 0X%x", aValue, bValue,
+                    expr.longExpr(new Expr.MemL(aValue),
+                                  new Expr.MemL(bValue)));
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/Expr.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * Expression that should be replaced by particular instrinsic
+ * or intruction during compilation.
+ */
+
+public abstract class Expr {
+
+    public static class MemI {
+        public MemI(int i) {
+            this.value = i;
+        }
+
+        public int value;
+    }
+
+    public static class MemL {
+        public MemL(long l) {
+            this.value = l;
+        }
+
+        public long value;
+    }
+
+    public boolean isUnaryArgumentSupported() {
+        return false;
+    }
+
+    public boolean isIntExprSupported() {
+        return false;
+    }
+
+    public boolean isBinaryArgumentSupported() {
+        return false;
+    }
+
+    public boolean isLongExprSupported() {
+        return false;
+    }
+
+    public boolean isMemExprSupported() {
+        return false;
+    }
+
+    public int intExpr(int reg) {
+        throw new UnsupportedOperationException();
+    }
+
+    public int intExpr(MemI mem) {
+        throw new UnsupportedOperationException();
+    }
+
+    public int intExpr(int a, int b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public int intExpr(int a, MemI b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public int intExpr(MemI a, int b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public int intExpr(MemI a, MemI b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(long reg) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(MemL mem) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(long a, long b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(long a, MemL b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(MemL a, long b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public long longExpr(MemL a, MemL b) {
+        throw new UnsupportedOperationException();
+    }
+
+    public static class BMIExpr extends Expr {
+
+        public boolean isMemExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BMIBinaryExpr extends BMIExpr {
+
+        public boolean isBinaryArgumentSupported() {
+            return true;
+        }
+
+    }
+
+    public static class BMIUnaryExpr extends BMIExpr {
+        public boolean isUnaryArgumentSupported() {
+            return true;
+        }
+    }
+
+    public static class BMIBinaryIntExpr extends BMIBinaryExpr {
+        public boolean isIntExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BMIBinaryLongExpr extends BMIBinaryExpr {
+        public boolean isLongExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BMIUnaryIntExpr extends BMIUnaryExpr {
+        public boolean isIntExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BMIUnaryLongExpr extends BMIUnaryExpr {
+        public boolean isLongExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BitCountingExpr extends Expr {
+        public boolean isUnaryArgumentSupported() {
+            return true;
+        }
+    }
+
+    public static class BitCountingIntExpr extends BitCountingExpr {
+        public boolean isIntExprSupported() {
+            return true;
+        }
+    }
+
+    public static class BitCountingLongExpr extends BitCountingExpr {
+        public boolean isLongExprSupported() {
+            return true;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestAndnI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of ANDN instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestAndnI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestAndnI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestAndnI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. "+
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(AndnIExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(AndnICommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class AndnIExpr extends Expr.BMIBinaryIntExpr {
+
+        public int intExpr(int src1, int src2) {
+            return ~src1 & src2;
+        }
+
+        public int intExpr(int src1, Expr.MemI src2) {
+            return ~src1 & src2.value;
+        }
+
+        public int intExpr(Expr.MemI src1, int src2) {
+            return ~src1.value & src2;
+        }
+
+        public int intExpr(Expr.MemI src1, Expr.MemI src2) {
+            return ~src1.value & src2.value;
+        }
+    }
+
+    public static class AndnICommutativeExpr extends Expr.BMIBinaryIntExpr {
+
+        public int intExpr(int src1, int src2) {
+            return src1 & ~src2;
+        }
+
+        public int intExpr(int src1, Expr.MemI src2) {
+            return src1 & ~src2.value;
+        }
+
+        public int intExpr(Expr.MemI src1, int src2) {
+            return src1.value & ~src2;
+        }
+
+        public int intExpr(Expr.MemI src1, Expr.MemI src2) {
+            return src1.value & ~src2.value;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestAndnL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of ANDN instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestAndnL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestAndnL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestAndnL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(AndnLExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(AndnLCommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class AndnLExpr extends Expr.BMIBinaryLongExpr {
+
+        public long longExpr(long src1, long src2) {
+            return ~src1 & src2;
+        }
+
+        public long longExpr(long src1, Expr.MemL src2) {
+            return ~src1 & src2.value;
+        }
+
+        public long longExpr(Expr.MemL src1, long src2) {
+            return ~src1.value & src2;
+        }
+
+        public long longExpr(Expr.MemL src1, Expr.MemL src2) {
+            return ~src1.value & src2.value;
+        }
+
+
+    }
+
+    public static class AndnLCommutativeExpr extends Expr.BMIBinaryLongExpr {
+
+        public long longExpr(long src1, long src2) {
+            return src1 & ~src2;
+        }
+
+        public long longExpr(long src1, Expr.MemL src2) {
+            return src1 & ~src2.value;
+        }
+
+        public long longExpr(Expr.MemL src1, long src2) {
+            return src1.value & ~src2;
+        }
+
+        public long longExpr(Expr.MemL src1, Expr.MemL src2) {
+            return src1.value & ~src2.value;
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsiI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSI instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsiI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsiI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsiI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsiIExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsiICommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsiIExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return -src & src;
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return -src.value & src.value;
+        }
+
+    }
+
+    public static class BlsiICommutativeExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return src & -src;
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return src.value & -src.value;
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsiL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSI instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsiL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsiL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsiL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsiLExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsiLCommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsiLExpr extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return -src & src;
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return -src.value & src.value;
+        }
+
+    }
+
+    public static class BlsiLCommutativeExpr extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return src & -src;
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return src.value & -src.value;
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsmskI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSMSK instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsmskI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsmskI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsmskI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsmskIExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsmskICommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsmskIExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return (src - 1) ^ src;
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return (src.value - 1) ^ src.value;
+        }
+
+    }
+
+    public static class BlsmskICommutativeExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return src ^ (src - 1);
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return src.value ^ (src.value - 1);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsmskL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSMSK instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsmskL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsmskL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsmskL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsmskLExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsmskLCommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsmskLExpr
+        extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return (src - 1) ^ src;
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return (src.value - 1) ^ src.value;
+        }
+
+    }
+
+    public static class BlsmskLCommutativeExpr
+        extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return src ^ (src - 1);
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return src.value ^ (src.value - 1);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsrI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSR instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsrI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsrI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsrI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsrIExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsrICommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsrIExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return (src - 1) & src;
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return (src.value - 1) & src.value;
+        }
+
+    }
+
+    public static class BlsrICommutativeExpr extends Expr.BMIUnaryIntExpr {
+
+        public int intExpr(int src) {
+            return src & (src - 1);
+        }
+
+        public int intExpr(Expr.MemI src) {
+            return src.value & (src.value - 1);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestBlsrL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of BLSR instruction
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestBlsrL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestBlsrL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestBlsrL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(BlsrLExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+        BMITestRunner.runTests(BlsrLCommutativeExpr.class, args,
+                               "-XX:+UseBMI1Instructions");
+    }
+
+    public static class BlsrLExpr extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return (src - 1) & src;
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return (src.value - 1) & src.value;
+        }
+
+    }
+
+    public static class BlsrLCommutativeExpr extends Expr.BMIUnaryLongExpr {
+
+        public long longExpr(long src) {
+            return src & (src - 1);
+        }
+
+        public long longExpr(Expr.MemL src) {
+            return src.value & (src.value - 1);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestLzcntI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of intrinsic
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestLzcntI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestLzcntI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestLzcntI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("lzcnt")) {
+            System.out.println("CPU does not support lzcnt feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(LzcntIExpr.class, args,
+                               "-XX:+UseCountLeadingZerosInstruction");
+    }
+
+    public static class LzcntIExpr extends Expr.BitCountingIntExpr {
+
+        public int intExpr(int src) {
+            return Integer.numberOfLeadingZeros(src);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestLzcntL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of intrinsic
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestLzcntL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestLzcntL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestLzcntL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("lzcnt")) {
+            System.out.println("CPU does not support lzcnt feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(LzcntLExpr.class, args,
+                               "-XX:+UseCountLeadingZerosInstruction");
+    }
+
+    public static class LzcntLExpr extends Expr.BitCountingLongExpr {
+
+        public long longExpr(long src) {
+            return Long.numberOfLeadingZeros(src);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestTzcntI.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of intrinsic
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestTzcntI BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestTzcntI
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestTzcntI {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(TzcntIExpr.class, args,
+                               "-XX:+UseCountTrailingZerosInstruction");
+    }
+
+    public static class TzcntIExpr extends Expr.BitCountingIntExpr {
+
+        public int intExpr(int src) {
+            return Integer.numberOfTrailingZeros(src);
+        }
+
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/bmi/TestTzcntL.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8031321
+ * @summary Verify that results of computations are the same w/
+ *          and w/o usage of intrinsic
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestTzcntL BMITestRunner Expr
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI TestTzcntL
+ */
+
+import sun.hotspot.cpuinfo.CPUInfo;
+
+public class TestTzcntL {
+
+    public static void main(String args[]) throws Throwable {
+        if (!CPUInfo.hasFeature("bmi1")) {
+            System.out.println("CPU does not support bmi1 feature. " +
+                               "Test skipped.");
+            return;
+        }
+
+        BMITestRunner.runTests(TzcntLExpr.class, args,
+                               "-XX:+UseCountTrailingZerosInstruction");
+    }
+
+    public static class TzcntLExpr extends Expr.BitCountingLongExpr {
+
+        public long longExpr(long src) {
+            return Long.numberOfTrailingZeros(src);
+        }
+
+    }
+
+}
--- a/test/compiler/intrinsics/mathexact/AddExactICondTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactICondTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8024924
  * @summary Test non constant addExact
  * @compile AddExactICondTest.java
- * @run main AddExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactICondTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactIConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactIConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8024924
  * @summary Test constant addExact
  * @compile AddExactIConstantTest.java Verify.java
- * @run main AddExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactIConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactILoadTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactILoadTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8024924
  * @summary Test non constant addExact
  * @compile AddExactILoadTest.java Verify.java
- * @run main AddExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactILoadTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactILoopDependentTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactILoopDependentTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8024924
  * @summary Test non constant addExact
  * @compile AddExactILoopDependentTest.java Verify.java
- * @run main AddExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactILoopDependentTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactINonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactINonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8024924
  * @summary Test non constant addExact
  * @compile AddExactINonConstantTest.java Verify.java
- * @run main AddExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactINonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactIRepeatTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactIRepeatTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8025657
  * @summary Test repeating addExact
  * @compile AddExactIRepeatTest.java Verify.java
- * @run main AddExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactIRepeatTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactLConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactLConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant addExact
  * @compile AddExactLConstantTest.java Verify.java
- * @run main AddExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactLConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/AddExactLNonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/AddExactLNonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant addExact
  * @compile AddExactLNonConstantTest.java Verify.java
- * @run main AddExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main AddExactLNonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/CompareTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/CompareTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026722
  * @summary Verify that the compare after addExact is a signed compare
  * @compile CompareTest.java
- * @run main CompareTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main CompareTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/DecExactITest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/DecExactITest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test decrementExact
  * @compile DecExactITest.java Verify.java
- * @run main DecExactITest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main DecExactITest
  *
  */
--- a/test/compiler/intrinsics/mathexact/DecExactLTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/DecExactLTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test decrementExact
  * @compile DecExactLTest.java Verify.java
- * @run main DecExactLTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main DecExactLTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/GVNTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/GVNTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8028207
  * @summary Verify that GVN doesn't mess up the two addExacts
  * @compile GVNTest.java
- * @run main GVNTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main GVNTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/IncExactITest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/IncExactITest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test incrementExact
  * @compile IncExactITest.java Verify.java
- * @run main IncExactITest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main IncExactITest
  *
  */
--- a/test/compiler/intrinsics/mathexact/IncExactLTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/IncExactLTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test incrementExact
  * @compile IncExactLTest.java Verify.java
- * @run main IncExactLTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main IncExactLTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactICondTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactICondTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test multiplyExact as condition
  * @compile MulExactICondTest.java
- * @run main MulExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactICondTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactIConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactIConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant multiplyExact
  * @compile MulExactIConstantTest.java Verify.java
- * @run main MulExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactIConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactILoadTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactILoadTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test multiplyExact
  * @compile MulExactILoadTest.java Verify.java
- * @run main MulExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactILoadTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactILoopDependentTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactILoopDependentTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test loop dependent multiplyExact
  * @compile MulExactILoopDependentTest.java Verify.java
- * @run main MulExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactILoopDependentTest
  *
  */
 public class MulExactILoopDependentTest {
--- a/test/compiler/intrinsics/mathexact/MulExactINonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactINonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant multiplyExact
  * @compile MulExactINonConstantTest.java Verify.java
- * @run main MulExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactINonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactIRepeatTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactIRepeatTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test repeating multiplyExact
  * @compile MulExactIRepeatTest.java Verify.java
- * @run main MulExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactIRepeatTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactLConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactLConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant mulExact
  * @compile MulExactLConstantTest.java Verify.java
- * @run main MulExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactLConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/MulExactLNonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/MulExactLNonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant mulExact
  * @compile MulExactLNonConstantTest.java Verify.java
- * @run main MulExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main MulExactLNonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/NegExactIConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactIConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant negExact
  * @compile NegExactIConstantTest.java Verify.java
- * @run main NegExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactIConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/NegExactILoadTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactILoadTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,14 +26,14 @@
  * @bug 8026844
  * @summary Test negExact
  * @compile NegExactILoadTest.java Verify.java
- * @run main NegExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactILoadTest
  *
  */

 public class NegExactILoadTest {
     public static void main(String[] args) {
-        Verify.LoadTest.init();
-        Verify.LoadTest.verify(new Verify.UnaryToBinary(new Verify.NegExactI()));
+      Verify.LoadTest.init();
+      Verify.LoadTest.verify(new Verify.UnaryToBinary(new Verify.NegExactI()));
     }

 }
--- a/test/compiler/intrinsics/mathexact/NegExactILoopDependentTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactILoopDependentTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test negExact loop dependent
  * @compile NegExactILoopDependentTest.java Verify.java
- * @run main NegExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactILoopDependentTest
  *
  */
 public class NegExactILoopDependentTest {
--- a/test/compiler/intrinsics/mathexact/NegExactINonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactINonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant negExact
  * @compile NegExactINonConstantTest.java Verify.java
- * @run main NegExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactINonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/NegExactLConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactLConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant negExact
  * @compile NegExactLConstantTest.java Verify.java
- * @run main NegExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactLConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/NegExactLNonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NegExactLNonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant negExact
  * @compile NegExactLNonConstantTest.java Verify.java
- * @run main NegExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NegExactLNonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/NestedMathExactTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/NestedMathExactTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8027444
  * @summary Test nested loops
  * @compile NestedMathExactTest.java
- * @run main NestedMathExactTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main NestedMathExactTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SplitThruPhiTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SplitThruPhiTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8028198
  * @summary Verify that split through phi does the right thing
  * @compile SplitThruPhiTest.java
- * @run main SplitThruPhiTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SplitThruPhiTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactICondTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactICondTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test subtractExact as condition
  * @compile SubExactICondTest.java Verify.java
- * @run main SubExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactICondTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactIConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactIConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test constant subtractExact
  * @compile SubExactIConstantTest.java Verify.java
- * @run main SubExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactIConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactILoadTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactILoadTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant subtractExact
  * @compile SubExactILoadTest.java Verify.java
- * @run main SubExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactILoadTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactILoopDependentTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactILoopDependentTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant subtractExact
  * @compile SubExactILoopDependentTest.java Verify.java
- * @run main SubExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactILoopDependentTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactINonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactINonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test non constant subtractExact
  * @compile SubExactINonConstantTest.java Verify.java
- * @run main SubExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactINonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactIRepeatTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactIRepeatTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,7 +26,7 @@
  * @bug 8026844
  * @summary Test repeating subtractExact
  * @compile SubExactIRepeatTest.java Verify.java
- * @run main SubExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactIRepeatTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactLConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactLConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -27,7 +27,7 @@
  * @bug 8027353
  * @summary Test constant subtractExact
  * @compile SubExactLConstantTest.java Verify.java
- * @run main SubExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactLConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/SubExactLNonConstantTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/SubExactLNonConstantTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -27,7 +27,7 @@
  * @bug 8027353
  * @summary Test non constant subtractExact
  * @compile SubExactLNonConstantTest.java Verify.java
- * @run main SubExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics
+ * @run main SubExactLNonConstantTest
  *
  */
--- a/test/compiler/intrinsics/mathexact/Verify.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/Verify.java	Tue Mar 25 17:07:36 2014 -0700
@@ -160,6 +160,7 @@

     public static class NonConstantTest {
         public static java.util.Random rnd = new java.util.Random();
+        public static int[] values = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };

         public static void verify(BinaryMethod method) {
             for (int i = 0; i < 50000; ++i) {
@@ -169,6 +170,10 @@
                 Verify.verifyBinary(rnd1 + 1, rnd2, method);
                 Verify.verifyBinary(rnd1 - 1, rnd2, method);
                 Verify.verifyBinary(rnd1, rnd2 - 1, method);
+                Verify.verifyBinary(0, values[0], method);
+                Verify.verifyBinary(values[0], 0, method);
+                Verify.verifyBinary(0, values[1], method);
+                Verify.verifyBinary(values[1], 0, method);
             }
         }
     }
--- a/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build AddExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics AddExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics AddExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build AddExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics AddExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics AddExactLongTest
--- a/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build DecrementExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics DecrementExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics DecrementExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build DecrementExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics DecrementExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics DecrementExactLongTest
@@ -42,4 +42,4 @@
     public static void main(String[] args) throws Exception {
         new IntrinsicBase.LongTest(MathIntrinsic.LongIntrinsic.Decrement).test();
     }
-}
\ No newline at end of file
+}
--- a/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build IncrementExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics IncrementExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics IncrementExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build IncrementExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics IncrementExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics IncrementExactLongTest
--- a/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build MultiplyExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics MultiplyExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics MultiplyExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build MultiplyExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics MultiplyExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics MultiplyExactLongTest
--- a/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build NegateExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics NegateExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics NegateExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build NegateExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics NegateExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics NegateExactLongTest
--- a/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build SubtractExactIntTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics SubtractExactIntTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics SubtractExactIntTest
--- a/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -26,11 +26,11 @@
  * @library /testlibrary /testlibrary/whitebox /compiler/whitebox
  * @build SubtractExactLongTest
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics SubtractExactLongTest
- * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
  *                   -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation
  *                   -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod
  *                   -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics SubtractExactLongTest
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/unsafe/UnsafeGetAddressTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6653795
+ * @summary C2 intrinsic for Unsafe.getAddress performs pointer sign extension on 32-bit systems
+ * @run main UnsafeGetAddressTest
+ *
+ */
+
+import sun.misc.Unsafe;
+import java.lang.reflect.*;
+
+public class UnsafeGetAddressTest {
+    private static Unsafe unsafe;
+
+    public static void main(String[] args) throws Exception {
+        Class c = UnsafeGetAddressTest.class.getClassLoader().loadClass("sun.misc.Unsafe");
+        Field f = c.getDeclaredField("theUnsafe");
+        f.setAccessible(true);
+        unsafe = (Unsafe)f.get(c);
+
+        long address = unsafe.allocateMemory(unsafe.addressSize());
+        unsafe.putAddress(address, 0x0000000080000000L);
+        // from sun.misc.Unsafe.getAddress' documentation:
+        // "If the native pointer is less than 64 bits wide, it is
+        // extended as an unsigned number to a Java long."
+        result = unsafe.getAddress(address);
+        System.out.printf("1: was 0x%x, expected 0x%x\n", result,
+                0x0000000080000000L);
+        for (int i = 0; i < 1000000; i++) {
+            result = unsafe.getAddress(address);
+        }
+
+        // The code has got compiled, check the result now
+        System.out.printf("2: was 0x%x, expected 0x%x\n", result,
+                0x0000000080000000L);
+        if (result != 0x0000000080000000L) {
+            System.out.println("Test Failed");
+            System.exit(97);
+        } else {
+            System.out.println("Test Passed");
+        }
+    }
+    static volatile long result;
+}
+
--- a/test/compiler/tiered/NonTieredLevelsTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/tiered/NonTieredLevelsTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,6 +70,9 @@

     @Override
     protected void test() throws Exception {
+        if (skipXcompOSR()) {
+          return;
+        }
         checkNotCompiled();
         compile();
         checkCompiled();
--- a/test/compiler/tiered/TieredLevelsTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/tiered/TieredLevelsTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,9 @@

     @Override
     protected void test() throws Exception {
+        if (skipXcompOSR()) {
+          return;
+        }
         checkNotCompiled();
         compile();
         checkCompiled();
--- a/test/compiler/types/TestMeetTopArrayExactConstantArray.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/types/TestMeetTopArrayExactConstantArray.java	Tue Mar 25 17:07:36 2014 -0700
@@ -25,7 +25,7 @@
  * @test
  * @bug 8027571
  * @summary meet of TopPTR exact array with constant array is not symmetric
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestMeetTopArrayExactConstantArray
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestMeetTopArrayExactConstantArray
  *
  */
--- a/test/compiler/types/TestSpeculationFailedHigherEqual.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/types/TestSpeculationFailedHigherEqual.java	Tue Mar 25 17:07:36 2014 -0700
@@ -25,7 +25,7 @@
  * @test
  * @bug 8027422
  * @summary type methods shouldn't always operate on speculative part
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestSpeculationFailedHigherEqual
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestSpeculationFailedHigherEqual
  *
  */
--- a/test/compiler/types/TypeSpeculation.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/types/TypeSpeculation.java	Tue Mar 25 17:07:36 2014 -0700
@@ -25,7 +25,7 @@
  * @test
  * @bug 8024070
  * @summary Test that type speculation doesn't cause incorrect execution
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation TypeSpeculation
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation TypeSpeculation
  *
  */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/uncommontrap/TestSpecTrapClassUnloading.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8031752
+ * @summary speculative traps need to be cleaned up at GC
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+UseTypeSpeculation -XX:TypeProfileLevel=222 -XX:CompileCommand=exclude,java.lang.reflect.Method::invoke -XX:CompileCommand=exclude,sun.reflect.DelegatingMethodAccessorImpl::invoke -Xmx1M TestSpecTrapClassUnloading
+ *
+ */
+
+import java.lang.reflect.Method;
+
+public class TestSpecTrapClassUnloading {
+    static class B {
+        final public boolean m(Object o) {
+            if (o.getClass() == B.class) {
+                return true;
+            }
+            return false;
+        }
+    }
+
+    static class MemoryChunk {
+        MemoryChunk other;
+        long[] array;
+        MemoryChunk(MemoryChunk other) {
+            other = other;
+            array = new long[1024 * 1024 * 1024];
+        }
+    }
+
+    static void m1(B b, Object o) {
+        b.m(o);
+    }
+
+    static void m2(B b, Object o) {
+        b.m(o);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Method m = B.class.getMethod("m", Object.class);
+        Object o = new Object();
+        B b = new B();
+
+        // add speculative trap in B.m() for m1
+        for (int i = 0; i < 20000; i++) {
+            m1(b, b);
+        }
+        m1(b, o);
+
+        // add speculative trap in B.m() for code generated by reflection
+        for (int i = 0; i < 20000; i++) {
+            m.invoke(b, b);
+        }
+        m.invoke(b, o);
+
+        m = null;
+
+        // add speculative trap in B.m() for m2
+        for (int i = 0; i < 20000; i++) {
+            m2(b, b);
+        }
+        m2(b, o);
+
+        // Exhaust memory which causes the code generated by
+        // reflection to be unloaded but B.m() is not.
+        MemoryChunk root = null;
+        try {
+            while (true) {
+                root = new MemoryChunk(root);
+            }
+        } catch(OutOfMemoryError e) {
+            root = null;
+        }
+    }
+}
--- a/test/compiler/whitebox/CompilerWhiteBoxTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/whitebox/CompilerWhiteBoxTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -380,6 +380,20 @@
         /** flag for OSR test case */
         boolean isOsr();
     }
+
+    /**
+     * @return {@code true} if the current test case is OSR and the mode is
+     *          Xcomp, otherwise {@code false}
+     */
+    protected boolean skipXcompOSR() {
+        boolean result =  testCase.isOsr()
+                && CompilerWhiteBoxTest.MODE.startsWith("compiled ");
+        if (result && IS_VERBOSE) {
+            System.err.printf("Warning: %s is not applicable in %s%n",
+                    testCase.name(), CompilerWhiteBoxTest.MODE);
+        }
+        return result;
+    }
 }

 enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
--- a/test/compiler/whitebox/DeoptimizeAllTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/whitebox/DeoptimizeAllTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         compile();
         checkCompiled();
--- a/test/compiler/whitebox/DeoptimizeMethodTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/whitebox/DeoptimizeMethodTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         compile();
         checkCompiled();
--- a/test/compiler/whitebox/IsMethodCompilableTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/whitebox/IsMethodCompilableTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,10 +66,7 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
+        if (skipXcompOSR()) {
           return;
         }
         if (!isCompilable()) {
--- a/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/compiler/whitebox/MakeMethodNotCompilableTest.java	Tue Mar 25 17:07:36 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,11 +53,8 @@
      */
     @Override
     protected void test() throws Exception {
-        if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
-                "compiled ")) {
-          System.err.printf("Warning: %s is not applicable in %s%n",
-                testCase.name(), CompilerWhiteBoxTest.MODE);
-          return;
+        if (skipXcompOSR()) {
+            return;
         }
         checkNotCompiled();
         if (!isCompilable()) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/arguments/TestDynMaxHeapFreeRatio.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test TestDynMaxHeapFreeRatio
+ * @bug 8028391
+ * @summary Verify that MaxHeapFreeRatio flag is manageable
+ * @library /testlibrary
+ * @run main TestDynMaxHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=0 -XX:MaxHeapFreeRatio=100 TestDynMaxHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 -XX:-UseAdaptiveSizePolicy TestDynMaxHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 TestDynMaxHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMaxHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMaxHeapFreeRatio
+ */
+import com.oracle.java.testlibrary.TestDynamicVMOption;
+import com.oracle.java.testlibrary.DynamicVMOptionChecker;
+
+public class TestDynMaxHeapFreeRatio extends TestDynamicVMOption {
+
+    public static final String MinFreeRatioFlagName = "MinHeapFreeRatio";
+    public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio";
+
+    public TestDynMaxHeapFreeRatio() {
+        super(MaxFreeRatioFlagName);
+    }
+
+    public void test() {
+
+        int minHeapFreeValue = DynamicVMOptionChecker.getIntValue(MinFreeRatioFlagName);
+        System.out.println(MinFreeRatioFlagName + " = " + minHeapFreeValue);
+
+        testPercentageValues();
+
+        checkInvalidValue(Integer.toString(minHeapFreeValue - 1));
+        checkValidValue(Integer.toString(minHeapFreeValue));
+        checkValidValue("100");
+    }
+
+    public static void main(String args[]) throws Exception {
+        new TestDynMaxHeapFreeRatio().test();
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/arguments/TestDynMinHeapFreeRatio.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test TestDynMinHeapFreeRatio
+ * @bug 8028391
+ * @summary Verify that MinHeapFreeRatio flag is manageable
+ * @library /testlibrary
+ * @run main TestDynMinHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=0 -XX:MaxHeapFreeRatio=100 TestDynMinHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 -XX:-UseAdaptiveSizePolicy TestDynMinHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 TestDynMinHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMinHeapFreeRatio
+ * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMinHeapFreeRatio
+ */
+import com.oracle.java.testlibrary.TestDynamicVMOption;
+import com.oracle.java.testlibrary.DynamicVMOptionChecker;
+
+public class TestDynMinHeapFreeRatio extends TestDynamicVMOption {
+
+    public static final String MinFreeRatioFlagName = "MinHeapFreeRatio";
+    public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio";
+
+    public TestDynMinHeapFreeRatio() {
+        super(MinFreeRatioFlagName);
+    }
+
+    public void test() {
+        int maxHeapFreeValue = DynamicVMOptionChecker.getIntValue(MaxFreeRatioFlagName);
+        System.out.println(MaxFreeRatioFlagName + " = " + maxHeapFreeValue);
+
+        testPercentageValues();
+
+        checkInvalidValue(Integer.toString(maxHeapFreeValue + 1));
+        checkValidValue(Integer.toString(maxHeapFreeValue));
+        checkValidValue("0");
+    }
+
+    public static void main(String args[]) throws Exception {
+        new TestDynMinHeapFreeRatio().test();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestGCLogMessages.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestPrintGCDetails
+ * @bug 8035406 8027295 8035398
+ * @summary Ensure that the PrintGCDetails output for a minor GC with G1
+ * includes the expected necessary messages.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+public class TestGCLogMessages {
+  public static void main(String[] args) throws Exception {
+    testNormalLogs();
+    testWithToSpaceExhaustionLogs();
+  }
+
+  private static void testNormalLogs() throws Exception {
+
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                              "-Xmx10M",
+                                                              GCTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+    output.shouldNotContain("[Redirty Cards");
+    output.shouldNotContain("[Code Root Purge");
+    output.shouldNotContain("[String Dedup Fixup");
+    output.shouldNotContain("[Young Free CSet");
+    output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldHaveExitValue(0);
+
+    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-XX:+UseStringDeduplication",
+                                               "-Xmx10M",
+                                               "-XX:+PrintGCDetails",
+                                               GCTest.class.getName());
+
+    output = new OutputAnalyzer(pb.start());
+
+    output.shouldContain("[Redirty Cards");
+    output.shouldContain("[Code Root Purge");
+    output.shouldContain("[String Dedup Fixup");
+    output.shouldNotContain("[Young Free CSet");
+    output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldHaveExitValue(0);
+
+    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-XX:+UseStringDeduplication",
+                                               "-Xmx10M",
+                                               "-XX:+PrintGCDetails",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               GCTest.class.getName());
+
+    output = new OutputAnalyzer(pb.start());
+
+    output.shouldContain("[Redirty Cards");
+    output.shouldContain("[Code Root Purge");
+    output.shouldContain("[String Dedup Fixup");
+    output.shouldContain("[Young Free CSet");
+    output.shouldContain("[Non-Young Free CSet");
+
+    // also check evacuation failure messages once
+    output.shouldNotContain("[Evacuation Failure");
+    output.shouldNotContain("[Recalculate Used");
+    output.shouldNotContain("[Remove Self Forwards");
+    output.shouldNotContain("[Restore RemSet");
+    output.shouldHaveExitValue(0);
+  }
+
+  private static void testWithToSpaceExhaustionLogs() throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xmx10M",
+                                               "-Xmn5M",
+                                               "-XX:+PrintGCDetails",
+                                               GCTestWithToSpaceExhaustion.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+    output.shouldContain("[Evacuation Failure");
+    output.shouldNotContain("[Recalculate Used");
+    output.shouldNotContain("[Remove Self Forwards");
+    output.shouldNotContain("[Restore RemSet");
+    output.shouldHaveExitValue(0);
+
+    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xmx10M",
+                                               "-Xmn5M",
+                                               "-XX:+PrintGCDetails",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               GCTestWithToSpaceExhaustion.class.getName());
+
+    output = new OutputAnalyzer(pb.start());
+    output.shouldContain("[Evacuation Failure");
+    output.shouldContain("[Recalculate Used");
+    output.shouldContain("[Remove Self Forwards");
+    output.shouldContain("[Restore RemSet");
+    output.shouldHaveExitValue(0);
+  }
+
+  static class GCTest {
+    private static byte[] garbage;
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      System.out.println("Done");
+    }
+  }
+
+  static class GCTestWithToSpaceExhaustion {
+    private static byte[] garbage;
+    private static byte[] largeObject;
+    public static void main(String [] args) {
+      largeObject = new byte[5*1024*1024];
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC,
+      // some of them with to-space exhaustion.
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      System.out.println("Done");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationAgeThreshold.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationAgeThreshold
+ * @summary Test string deduplication age threshold
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationAgeThreshold {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testAgeThreshold();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationFullGC.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationFullGC
+ * @summary Test string deduplication during full GC
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationFullGC {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testFullGC();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationInterned.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationInterned
+ * @summary Test string deduplication of interned strings
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationInterned {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testInterned();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationMemoryUsage.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationMemoryUsage
+ * @summary Test string deduplication memory usage
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationMemoryUsage {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testMemoryUsage();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationPrintOptions.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationPrintOptions
+ * @summary Test string deduplication print options
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationPrintOptions {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testPrintOptions();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationTableRehash.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationTableRehash
+ * @summary Test string deduplication table rehash
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationTableRehash {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testTableRehash();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationTableResize.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationTableResize
+ * @summary Test string deduplication table resize
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationTableResize {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testTableResize();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationTools.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * Common code for string deduplication tests
+ */
+
+import java.lang.management.*;
+import java.lang.reflect.*;
+import java.security.*;
+import java.util.*;
+import com.oracle.java.testlibrary.*;
+import sun.misc.*;
+
+class TestStringDeduplicationTools {
+    private static final String YoungGC = "YoungGC";
+    private static final String FullGC  = "FullGC";
+
+    private static final int Xmn = 50;  // MB
+    private static final int Xms = 100; // MB
+    private static final int Xmx = 100; // MB
+    private static final int MB = 1024 * 1024;
+    private static final int StringLength = 50;
+
+    private static Field valueField;
+    private static Unsafe unsafe;
+    private static byte[] dummy;
+
+    static {
+        try {
+            Field field = Unsafe.class.getDeclaredField("theUnsafe");
+            field.setAccessible(true);
+            unsafe = (Unsafe)field.get(null);
+
+            valueField = String.class.getDeclaredField("value");
+            valueField.setAccessible(true);
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static Object getValue(String string) {
+        try {
+            return valueField.get(string);
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static void doFullGc(int numberOfTimes) {
+        for (int i = 0; i < numberOfTimes; i++) {
+            System.out.println("Begin: Full GC " + (i + 1) + "/" + numberOfTimes);
+            System.gc();
+            System.out.println("End: Full GC " + (i + 1) + "/" + numberOfTimes);
+        }
+    }
+
+    private static void doYoungGc(int numberOfTimes) {
+        // Provoke at least numberOfTimes young GCs
+        final int objectSize = 128;
+        final int maxObjectInYoung = (Xmn * MB) / objectSize;
+        for (int i = 0; i < numberOfTimes; i++) {
+            System.out.println("Begin: Young GC " + (i + 1) + "/" + numberOfTimes);
+            for (int j = 0; j < maxObjectInYoung + 1; j++) {
+                dummy = new byte[objectSize];
+            }
+            System.out.println("End: Young GC " + (i + 1) + "/" + numberOfTimes);
+        }
+    }
+
+    private static void forceDeduplication(int ageThreshold, String gcType) {
+        // Force deduplication to happen by either causing a FullGC or a YoungGC.
+        // We do several collections to also provoke a situation where the the
+        // deduplication thread needs to yield while processing the queue. This
+        // also tests that the references in the deduplication queue are adjusted
+        // accordingly.
+        if (gcType.equals(FullGC)) {
+            doFullGc(3);
+        } else {
+            doYoungGc(ageThreshold + 3);
+        }
+    }
+
+    private static String generateString(int id) {
+        StringBuilder builder = new StringBuilder(StringLength);
+
+        builder.append("DeduplicationTestString:" + id + ":");
+
+        while (builder.length() < StringLength) {
+            builder.append('X');
+        }
+
+        return builder.toString();
+    }
+
+    private static ArrayList<String> createStrings(int total, int unique) {
+        System.out.println("Creating strings: total=" + total + ", unique=" + unique);
+        if (total % unique != 0) {
+            throw new RuntimeException("Total must be divisible by unique");
+        }
+
+        ArrayList<String> list = new ArrayList<String>(total);
+        for (int j = 0; j < total / unique; j++) {
+            for (int i = 0; i < unique; i++) {
+                list.add(generateString(i));
+            }
+        }
+
+        return list;
+    }
+
+    private static void verifyStrings(ArrayList<String> list, int uniqueExpected) {
+        for (;;) {
+            // Check number of deduplicated strings
+            ArrayList<Object> unique = new ArrayList<Object>(uniqueExpected);
+            for (String string: list) {
+                Object value = getValue(string);
+                boolean uniqueValue = true;
+                for (Object obj: unique) {
+                    if (obj == value) {
+                        uniqueValue = false;
+                        break;
+                    }
+                }
+
+                if (uniqueValue) {
+                    unique.add(value);
+                }
+            }
+
+            System.out.println("Verifying strings: total=" + list.size() +
+                               ", uniqueFound=" + unique.size() +
+                               ", uniqueExpected=" + uniqueExpected);
+
+            if (unique.size() == uniqueExpected) {
+                System.out.println("Deduplication completed");
+                break;
+            } else {
+                System.out.println("Deduplication not completed, waiting...");
+
+                // Give the deduplication thread time to complete
+                try {
+                    Thread.sleep(1000);
+                } catch (Exception e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        }
+    }
+
+    private static OutputAnalyzer runTest(String... extraArgs) throws Exception {
+        String[] defaultArgs = new String[] {
+            "-Xmn" + Xmn + "m",
+            "-Xms" + Xms + "m",
+            "-Xmx" + Xmx + "m",
+            "-XX:+UseG1GC",
+            "-XX:+UnlockDiagnosticVMOptions",
+            "-XX:+VerifyAfterGC" // Always verify after GC
+        };
+
+        ArrayList<String> args = new ArrayList<String>();
+        args.addAll(Arrays.asList(defaultArgs));
+        args.addAll(Arrays.asList(extraArgs));
+
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(args.toArray(new String[args.size()]));
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        System.err.println(output.getStderr());
+        System.out.println(output.getStdout());
+        return output;
+    }
+
+    private static class DeduplicationTest {
+        public static void main(String[] args) {
+            System.out.println("Begin: DeduplicationTest");
+
+            final int numberOfStrings = Integer.parseUnsignedInt(args[0]);
+            final int numberOfUniqueStrings = Integer.parseUnsignedInt(args[1]);
+            final int ageThreshold = Integer.parseUnsignedInt(args[2]);
+            final String gcType = args[3];
+
+            ArrayList<String> list = createStrings(numberOfStrings, numberOfUniqueStrings);
+            forceDeduplication(ageThreshold, gcType);
+            verifyStrings(list, numberOfUniqueStrings);
+
+            System.out.println("End: DeduplicationTest");
+        }
+
+        public static OutputAnalyzer run(int numberOfStrings, int ageThreshold, String gcType, String... extraArgs) throws Exception {
+            String[] defaultArgs = new String[] {
+                "-XX:+UseStringDeduplication",
+                "-XX:StringDeduplicationAgeThreshold=" + ageThreshold,
+                DeduplicationTest.class.getName(),
+                "" + numberOfStrings,
+                "" + numberOfStrings / 2,
+                "" + ageThreshold,
+                gcType
+            };
+
+            ArrayList<String> args = new ArrayList<String>();
+            args.addAll(Arrays.asList(extraArgs));
+            args.addAll(Arrays.asList(defaultArgs));
+
+            return runTest(args.toArray(new String[args.size()]));
+        }
+    }
+
+    private static class InternedTest {
+        public static void main(String[] args) {
+            // This test verifies that interned strings are always
+            // deduplicated when being interned, and never after
+            // being interned.
+
+            System.out.println("Begin: InternedTest");
+
+            final int ageThreshold = Integer.parseUnsignedInt(args[0]);
+            final String baseString = "DeduplicationTestString:" + InternedTest.class.getName();
+
+            // Create duplicate of baseString
+            StringBuilder sb1 = new StringBuilder(baseString);
+            String dupString1 = sb1.toString();
+            if (getValue(dupString1) == getValue(baseString)) {
+                throw new RuntimeException("Values should not match");
+            }
+
+            // Force baseString to be inspected for deduplication
+            // and be inserted into the deduplication hashtable.
+            forceDeduplication(ageThreshold, FullGC);
+
+            // Wait for deduplication to occur
+            while (getValue(dupString1) != getValue(baseString)) {
+                System.out.println("Waiting...");
+                try {
+                    Thread.sleep(100);
+                } catch (Exception e) {
+                    throw new RuntimeException(e);
+                }
+            }
+
+            // Create a new duplicate of baseString
+            StringBuilder sb2 = new StringBuilder(baseString);
+            String dupString2 = sb2.toString();
+            if (getValue(dupString2) == getValue(baseString)) {
+                throw new RuntimeException("Values should not match");
+            }
+
+            // Intern the new duplicate
+            Object beforeInternedValue = getValue(dupString2);
+            String internedString = dupString2.intern();
+            if (internedString != dupString2) {
+                throw new RuntimeException("String should match");
+            }
+            if (getValue(internedString) != getValue(baseString)) {
+                throw new RuntimeException("Values should match");
+            }
+
+            // Check original value of interned string, to make sure
+            // deduplication happened on the interned string and not
+            // on the base string
+            if (beforeInternedValue == getValue(baseString)) {
+                throw new RuntimeException("Values should not match");
+            }
+
+            System.out.println("End: InternedTest");
+        }
+
+        public static OutputAnalyzer run() throws Exception {
+            return runTest("-XX:+PrintGC",
+                           "-XX:+PrintGCDetails",
+                           "-XX:+UseStringDeduplication",
+                           "-XX:+PrintStringDeduplicationStatistics",
+                           "-XX:StringDeduplicationAgeThreshold=" + DefaultAgeThreshold,
+                           InternedTest.class.getName(),
+                           "" + DefaultAgeThreshold);
+        }
+    }
+
+    private static class MemoryUsageTest {
+        public static void main(String[] args) {
+            System.out.println("Begin: MemoryUsageTest");
+
+            final boolean useStringDeduplication = Boolean.parseBoolean(args[0]);
+            final int numberOfStrings = LargeNumberOfStrings;
+            final int numberOfUniqueStrings = 1;
+
+            ArrayList<String> list = createStrings(numberOfStrings, numberOfUniqueStrings);
+            forceDeduplication(DefaultAgeThreshold, FullGC);
+
+            if (useStringDeduplication) {
+                verifyStrings(list, numberOfUniqueStrings);
+            }
+
+            System.gc();
+            System.out.println("Heap Memory Usage: " + ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed());
+
+            System.out.println("End: MemoryUsageTest");
+        }
+
+        public static OutputAnalyzer run(boolean useStringDeduplication) throws Exception {
+            String[] extraArgs = new String[0];
+
+            if (useStringDeduplication) {
+                extraArgs = new String[] {
+                    "-XX:+UseStringDeduplication",
+                    "-XX:+PrintStringDeduplicationStatistics",
+                    "-XX:StringDeduplicationAgeThreshold=" + DefaultAgeThreshold
+                };
+            }
+
+            String[] defaultArgs = new String[] {
+                "-XX:+PrintGC",
+                "-XX:+PrintGCDetails",
+                MemoryUsageTest.class.getName(),
+                "" + useStringDeduplication
+            };
+
+            ArrayList<String> args = new ArrayList<String>();
+            args.addAll(Arrays.asList(extraArgs));
+            args.addAll(Arrays.asList(defaultArgs));
+
+            return runTest(args.toArray(new String[args.size()]));
+        }
+    }
+
+    /*
+     * Tests
+     */
+
+    private static final int LargeNumberOfStrings = 10000;
+    private static final int SmallNumberOfStrings = 10;
+
+    private static final int MaxAgeThreshold      = 15;
+    private static final int DefaultAgeThreshold  = 3;
+    private static final int MinAgeThreshold      = 1;
+
+    private static final int TooLowAgeThreshold   = MinAgeThreshold - 1;
+    private static final int TooHighAgeThreshold  = MaxAgeThreshold + 1;
+
+    public static void testYoungGC() throws Exception {
+        // Do young GC to age strings to provoke deduplication
+        OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings,
+                                                      DefaultAgeThreshold,
+                                                      YoungGC,
+                                                      "-XX:+PrintGC",
+                                                      "-XX:+PrintStringDeduplicationStatistics");
+        output.shouldNotContain("Full GC");
+        output.shouldContain("GC pause (G1 Evacuation Pause) (young)");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testFullGC() throws Exception {
+        // Do full GC to age strings to provoke deduplication
+        OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings,
+                                                      DefaultAgeThreshold,
+                                                      FullGC,
+                                                      "-XX:+PrintGC",
+                                                      "-XX:+PrintStringDeduplicationStatistics");
+        output.shouldNotContain("GC pause (G1 Evacuation Pause) (young)");
+        output.shouldContain("Full GC");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testTableResize() throws Exception {
+        // Test with StringDeduplicationResizeALot
+        OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings,
+                                                      DefaultAgeThreshold,
+                                                      YoungGC,
+                                                      "-XX:+PrintGC",
+                                                      "-XX:+PrintStringDeduplicationStatistics",
+                                                      "-XX:+StringDeduplicationResizeALot");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldNotContain("Resize Count: 0");
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testTableRehash() throws Exception {
+        // Test with StringDeduplicationRehashALot
+        OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings,
+                                                      DefaultAgeThreshold,
+                                                      YoungGC,
+                                                      "-XX:+PrintGC",
+                                                      "-XX:+PrintStringDeduplicationStatistics",
+                                                      "-XX:+StringDeduplicationRehashALot");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldNotContain("Rehash Count: 0");
+        output.shouldNotContain("Hash Seed: 0x0");
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testAgeThreshold() throws Exception {
+        OutputAnalyzer output;
+
+        // Test with max age theshold
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       MaxAgeThreshold,
+                                       YoungGC,
+                                       "-XX:+PrintGC",
+                                       "-XX:+PrintStringDeduplicationStatistics");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+
+        // Test with min age theshold
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       MinAgeThreshold,
+                                       YoungGC,
+                                       "-XX:+PrintGC",
+                                       "-XX:+PrintStringDeduplicationStatistics");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+
+        // Test with too low age threshold
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       TooLowAgeThreshold,
+                                       YoungGC);
+        output.shouldContain("StringDeduplicationAgeThreshold of " + TooLowAgeThreshold +
+                             " is invalid; must be between " + MinAgeThreshold + " and " + MaxAgeThreshold);
+        output.shouldHaveExitValue(1);
+
+        // Test with too high age threshold
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       TooHighAgeThreshold,
+                                       YoungGC);
+        output.shouldContain("StringDeduplicationAgeThreshold of " + TooHighAgeThreshold +
+                             " is invalid; must be between " + MinAgeThreshold + " and " + MaxAgeThreshold);
+        output.shouldHaveExitValue(1);
+    }
+
+    public static void testPrintOptions() throws Exception {
+        OutputAnalyzer output;
+
+        // Test without PrintGC and without PrintStringDeduplicationStatistics
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       DefaultAgeThreshold,
+                                       YoungGC);
+        output.shouldNotContain("GC concurrent-string-deduplication");
+        output.shouldNotContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+
+        // Test with PrintGC but without PrintStringDeduplicationStatistics
+        output = DeduplicationTest.run(SmallNumberOfStrings,
+                                       DefaultAgeThreshold,
+                                       YoungGC,
+                                       "-XX:+PrintGC");
+        output.shouldContain("GC concurrent-string-deduplication");
+        output.shouldNotContain("Deduplicated:");
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testInterned() throws Exception {
+        // Test that interned strings are deduplicated before being interned
+        OutputAnalyzer output = InternedTest.run();
+        output.shouldHaveExitValue(0);
+    }
+
+    public static void testMemoryUsage() throws Exception {
+        // Test that memory usage is reduced after deduplication
+        OutputAnalyzer output;
+        final String usagePattern = "Heap Memory Usage: (\\d+)";
+
+        // Run without deduplication
+        output = MemoryUsageTest.run(false);
+        output.shouldHaveExitValue(0);
+        final long memoryUsageWithoutDedup = Long.parseLong(output.firstMatch(usagePattern, 1));
+
+        // Run with deduplication
+        output = MemoryUsageTest.run(true);
+        output.shouldHaveExitValue(0);
+        final long memoryUsageWithDedup = Long.parseLong(output.firstMatch(usagePattern, 1));
+
+        // Calculate expected memory usage with deduplication enabled. This calculation does
+        // not take alignment and padding into account, so it's a conservative estimate.
+        final long sizeOfChar = 2; // bytes
+        final long bytesSaved = (LargeNumberOfStrings - 1) * (StringLength * sizeOfChar + unsafe.ARRAY_CHAR_BASE_OFFSET);
+        final long memoryUsageWithDedupExpected = memoryUsageWithoutDedup - bytesSaved;
+
+        System.out.println("Memory usage summary:");
+        System.out.println("   memoryUsageWithoutDedup:      " + memoryUsageWithoutDedup);
+        System.out.println("   memoryUsageWithDedup:         " + memoryUsageWithDedup);
+        System.out.println("   memoryUsageWithDedupExpected: " + memoryUsageWithDedupExpected);
+
+        if (memoryUsageWithDedup > memoryUsageWithDedupExpected) {
+            throw new Exception("Unexpected memory usage, memoryUsageWithDedup should less or equal to memoryUsageWithDedupExpected");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestStringDeduplicationYoungGC.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestStringDeduplicationYoungGC
+ * @summary Test string deduplication during young GC
+ * @bug 8029075
+ * @key gc
+ * @library /testlibrary
+ */
+
+public class TestStringDeduplicationYoungGC {
+    public static void main(String[] args) throws Exception {
+        TestStringDeduplicationTools.testYoungGC();
+    }
+}
--- a/test/testlibrary/com/oracle/java/testlibrary/Asserts.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/testlibrary/com/oracle/java/testlibrary/Asserts.java	Tue Mar 25 17:07:36 2014 -0700
@@ -378,6 +378,64 @@
         }
     }

+    /**
+     * Asserts that two strings are equal.
+     *
+     * If strings are not equals, then exception message
+     * will contain {@code msg} followed by list of mismatched lines.
+     *
+     * @param str1 First string to compare.
+     * @param str2 Second string to compare.
+     * @param msg A description of the assumption.
+     * @throws RuntimeException if strings are not equal.
+     */
+    public static void assertStringsEqual(String str1, String str2,
+                                          String msg) {
+        String lineSeparator = System.getProperty("line.separator");
+        String str1Lines[] = str1.split(lineSeparator);
+        String str2Lines[] = str2.split(lineSeparator);
+
+        int minLength = Math.min(str1Lines.length, str2Lines.length);
+        String longestStringLines[] = ((str1Lines.length == minLength) ?
+                                       str2Lines : str1Lines);
+
+        boolean stringsAreDifferent = false;
+
+        StringBuilder messageBuilder = new StringBuilder(msg);
+
+        messageBuilder.append("\n");
+
+        for (int line = 0; line < minLength; line++) {
+            if (!str1Lines[line].equals(str2Lines[line])) {
+                messageBuilder.append(String.
+                                      format("[line %d] '%s' differs " +
+                                             "from '%s'\n",
+                                             line,
+                                             str1Lines[line],
+                                             str2Lines[line]));
+                stringsAreDifferent = true;
+            }
+        }
+
+        if (minLength < longestStringLines.length) {
+            String stringName = ((longestStringLines == str1Lines) ?
+                                 "first" : "second");
+            messageBuilder.append(String.format("Only %s string contains " +
+                                                "following lines:\n",
+                                                stringName));
+            stringsAreDifferent = true;
+            for(int line = minLength; line < longestStringLines.length; line++) {
+                messageBuilder.append(String.
+                                      format("[line %d] '%s'", line,
+                                             longestStringLines[line]));
+            }
+        }
+
+        if (stringsAreDifferent) {
+            error(messageBuilder.toString());
+        }
+    }
+
     private static <T extends Comparable<T>> int compare(T lhs, T rhs, String msg) {
         assertNotNull(lhs, msg);
         assertNotNull(rhs, msg);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/com/oracle/java/testlibrary/DynamicVMOptionChecker.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.java.testlibrary;
+
+import com.sun.management.HotSpotDiagnosticMXBean;
+import com.sun.management.VMOption;
+import java.lang.management.ManagementFactory;
+
+/**
+ * Simple class to check writeability, invalid and valid values for VMOption
+ */
+public class DynamicVMOptionChecker {
+
+    /**
+     * Reads VM option from PlatformMXBean and parse it to integer value
+     *
+     * @param name of option
+     * @return parsed value
+     */
+    public static int getIntValue(String name) {
+
+        VMOption option = ManagementFactory.
+                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
+                getVMOption(name);
+
+        return Integer.parseInt(option.getValue());
+    }
+
+    /**
+     * Sets VM option value
+     *
+     * @param name of option
+     * @param value to set
+     */
+    public static void setIntValue(String name, int value) {
+        ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class).setVMOption(name, Integer.toString(value));
+    }
+
+    /**
+     * Checks that VM option is dynamically writable
+     *
+     * @param name
+     * @throws RuntimeException if option if not writable
+     * @return always true
+     */
+    public static boolean checkIsWritable(String name) {
+        VMOption option = ManagementFactory.
+                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
+                getVMOption(name);
+
+        if (!option.isWriteable()) {
+            throw new RuntimeException(name + " is not writable");
+        }
+
+        return true;
+    }
+
+    /**
+     * Checks that value cannot be set
+     *
+     * @param name of flag
+     * @param value string representation of value to set
+     * @throws RuntimeException on error - when expected exception hasn't been thrown
+     */
+    public static void checkInvalidValue(String name, String value) {
+        // should throw
+        try {
+            ManagementFactory.
+                    getPlatformMXBean(HotSpotDiagnosticMXBean.class).
+                    setVMOption(name, value);
+
+        } catch (IllegalArgumentException e) {
+            return;
+        }
+
+        throw new RuntimeException("Expected IllegalArgumentException was not thrown, " + name + "= " + value);
+    }
+
+    /**
+     * Checks that value can be set
+     *
+     * @param name of flag to set
+     * @param value string representation of value to set
+     * @throws RuntimeException on error - when value in VM is not equal to origin
+     */
+    public static void checkValidValue(String name, String value) {
+        ManagementFactory.
+                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
+                setVMOption(name, value);
+
+        VMOption option = ManagementFactory.
+                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
+                getVMOption(name);
+
+        if (!option.getValue().equals(value)) {
+            throw new RuntimeException("Actual value of " + name + " \"" + option.getValue()
+                    + "\" not equal origin \"" + value + "\"");
+        }
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/com/oracle/java/testlibrary/TestDynamicVMOption.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.java.testlibrary;
+
+/**
+ * Simple class to check writeability, invalid and valid values for concrete VMOption
+ */
+public class TestDynamicVMOption {
+
+    private final String name;
+    private final int value;
+
+    /**
+     * Constructor
+     *
+     * @param name of VM option to test
+     */
+    public TestDynamicVMOption(String name) {
+        this.name = name;
+        this.value = DynamicVMOptionChecker.getIntValue(name);
+        System.out.println(this.name + " = " + this.value);
+    }
+
+    /**
+     * Checks that this value can accept valid percentage values and cannot accept invalid percentage values
+     *
+     * @throws RuntimeException
+     */
+    public void testPercentageValues() {
+        checkInvalidValue(Integer.toString(Integer.MIN_VALUE));
+        checkInvalidValue(Integer.toString(Integer.MAX_VALUE));
+        checkInvalidValue("-10");
+        checkInvalidValue("190");
+    }
+
+    /**
+     * Reads VM option from PlatformMXBean and parse it to integer value
+     *
+     * @return value
+     */
+    public int getIntValue() {
+        return DynamicVMOptionChecker.getIntValue(this.name);
+    }
+
+    /**
+     * Sets VM option value
+     *
+     * @param value to set
+     */
+    public void setIntValue(int value) {
+        DynamicVMOptionChecker.setIntValue(this.name, value);
+    }
+
+    /**
+     * Checks that this VM option is dynamically writable
+     *
+     * @throws RuntimeException if option if not writable
+     * @return true
+     */
+    public boolean checkIsWritable() throws RuntimeException {
+        return DynamicVMOptionChecker.checkIsWritable(this.name);
+    }
+
+    /**
+     * Checks that value for this VM option cannot be set
+     *
+     * @param value to check
+     * @throws RuntimeException on error - when expected exception hasn't been thrown
+     */
+    public void checkInvalidValue(String value) {
+        DynamicVMOptionChecker.checkInvalidValue(this.name, value);
+    }
+
+    /**
+     * Checks that value for this VM option can be set
+     *
+     * @param value to check
+     * @throws RuntimeException on error - when value in VM is not equal to origin
+     */
+    public void checkValidValue(String value) {
+        DynamicVMOptionChecker.checkValidValue(this.name, value);
+    }
+
+}
--- a/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Tue Mar 25 12:54:21 2014 -0700
+++ b/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Tue Mar 25 17:07:36 2014 -0700
@@ -150,4 +150,7 @@
   public native void runMemoryUnitTests();
   public native void readFromNoaccessArea();

+  // CPU features
+  public native String getCPUFeatures();
+
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/whitebox/sun/hotspot/cpuinfo/CPUInfo.java	Tue Mar 25 17:07:36 2014 -0700
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.hotspot.cpuinfo;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import sun.hotspot.WhiteBox;
+
+/**
+ * Information about CPU on test box.
+ *
+ * CPUInfo uses WhiteBox to gather information,
+ * so WhiteBox class should be added to bootclasspath
+ * and option -XX:+WhiteBoxAPI should expclicetly
+ * specified on command line.
+ */
+public class CPUInfo {
+
+    private static final List<String> features;
+    private static final String additionalCPUInfo;
+
+    static {
+        WhiteBox wb = WhiteBox.getWhiteBox();
+
+        Pattern additionalCPUInfoRE =
+            Pattern.compile("([^(]*\\([^)]*\\)[^,]*),\\s*");
+
+        String cpuFeaturesString = wb.getCPUFeatures();
+        Matcher matcher = additionalCPUInfoRE.matcher(cpuFeaturesString);
+        if (matcher.find()) {
+            additionalCPUInfo = matcher.group(1);
+        } else {
+            additionalCPUInfo = "";
+        }
+        String splittedFeatures[] = matcher.replaceAll("").split("(, )| ");
+
+        features = Collections.unmodifiableList(Arrays.
+                                                asList(splittedFeatures));
+    }
+
+    /**
+     * Get additional information about CPU.
+     * For example, on X86 in will be family/model/stepping
+     * and number of cores.
+     *
+     * @return additional CPU info
+     */
+    public static String getAdditionalCPUInfo() {
+        return additionalCPUInfo;
+    }
+
+    /**
+     * Get all known features supported by CPU.
+     *
+     * @return unmodifiable list with names of all known features
+     *         supported by CPU.
+     */
+    public static List<String> getFeatures() {
+        return features;
+    }
+
+    /**
+     * Check if some feature is supported by CPU.
+     *
+     * @param feature Name of feature to be tested.
+     * @return <b>true</b> if tested feature is supported by CPU.
+     */
+    public static boolean hasFeature(String feature) {
+        return features.contains(feature.toLowerCase());
+    }
+}